ezyang/gist:9fe499080ac4335fed8eea7c97665fe2 Secret

## gistfile0.txt
Running torchbench.py BERT_pytorch...
cuda train BERT_pytorch                       PASS
Running torchbench.py Background_Matting...
[2022-11-19 14:31:47,720] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <function floor at 0x7f67b1899ab0>
  args[0]: 256.0
[2022-11-19 14:31:47,721] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 254, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 225, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 1033, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/ir.py", line 3002, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/ir.py", line 2396, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/_symbolic_trace.py", line 813, in wrapped
    return orig_fn(*args, **kwargs)
TypeError: math.floor() takes exactly one argument (0 given)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 257, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: math.floor() takes exactly one argument (0 given)
  target: <function floor at 0x7f67b1899ab0>
  args[0]: 256.0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 132, in run
    msg = f"While executing {node.format_node(detailed=True)}"
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 476, in format_node
    f'args = {_format_arg(self.args, detailed=detailed)}, kwargs = {_format_arg(self.kwargs, detailed=detailed)})'
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in _format_arg
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in <genexpr>
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 97, in _format_arg
    return f"%{arg} : Tensor[size={list(a.size())}, stride={list(a.stride())}]"
AttributeError: 'SymFloat' object has no attribute 'size'
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train Background_Matting                 FAIL
WARNING:root:DALLE2_pytorch failed to load
Eager model failed to run
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1013, in validate_model
    self.model_iter_fn(model, example_inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 338, in forward_and_backward_pass
    self.grad_scaler.scale(loss).backward()
  File "/scratch/ezyang/work/b/pytorch/torch/_tensor.py", line 473, in backward
    torch.autograd.backward(
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1961, in run
    device, name, model, example_inputs, batch_size = runner.load_model(
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 283, in load_model
    self.validate_model(model, example_inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1015, in validate_model
    raise NotImplementedError("Eager model failed to run")
NotImplementedError: Eager model failed to run

Running torchbench.py LearningToPaint...
cuda train LearningToPaint                    PASS
Running torchbench.py Super_SloMo...
[2022-11-19 14:41:29,888] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <function floor at 0x7f2c7031f7f0>
  args[0]: 22.0
[2022-11-19 14:41:29,892] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 254, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 225, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 1033, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/ir.py", line 3002, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/ir.py", line 2396, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/_symbolic_trace.py", line 813, in wrapped
    return orig_fn(*args, **kwargs)
TypeError: math.floor() takes exactly one argument (0 given)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 257, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: math.floor() takes exactly one argument (0 given)
  target: <function floor at 0x7f2c7031f7f0>
  args[0]: 22.0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 132, in run
    msg = f"While executing {node.format_node(detailed=True)}"
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 476, in format_node
    f'args = {_format_arg(self.args, detailed=detailed)}, kwargs = {_format_arg(self.kwargs, detailed=detailed)})'
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in _format_arg
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in <genexpr>
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 97, in _format_arg
    return f"%{arg} : Tensor[size={list(a.size())}, stride={list(a.stride())}]"
AttributeError: 'SymFloat' object has no attribute 'size'
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train Super_SloMo                        FAIL
Running torchbench.py alexnet...
[2022-11-19 14:41:44,063] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._adaptive_avg_pool2d_backward
[2022-11-19 14:41:47,408] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._adaptive_avg_pool2d_backward
cuda train alexnet                            PASS
Running torchbench.py attention_is_all_you_need_pytorch...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 338, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/scratch/ezyang/work/b/pytorch/torch/_tensor.py", line 473, in backward
    torch.autograd.backward(
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/function.py", line 270, in apply
    return user_fn(self, *args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1037, in backward
    out = call_func_with_args(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 595, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 194, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/sf/csfdln3folbnsqnmloidoo6f5dgqld3ypa2ionoif3nputjq5kmn.py", line 57, in call
    return (as_strided(buf1, (9521, 512), (512, 1)), as_strided(buf2, (s0, s1, 512), (512*s1, 512, 1)), )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train attention_is_all_you_need_pytorch  FAIL
Running torchbench.py dcgan...
cuda train dcgan                              PASS
Running torchbench.py densenet121...
cuda train densenet121                        PASS
WARNING:root:detectron2_fcos_r_50_fpn failed to load
FCOS train is not supported by upstream detectron2. See GH Issue: https://github.com/facebookresearch/detectron2/issues/4369.
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1961, in run
    device, name, model, example_inputs, batch_size = runner.load_model(
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 252, in load_model
    benchmark = benchmark_cls(
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/util/model.py", line 18, in __call__
    obj = type.__call__(cls, *args, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/detectron2_fcos_r_50_fpn/__init__.py", line 15, in __init__
    super().__init__(variant="COCO-Detection/fcos_R_50_FPN_1x.py", test=test, device=device,
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/util/framework/detectron2/model_factory.py", line 100, in __init__
    loader = self.setup_train(cfg, args)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/util/framework/detectron2/model_factory.py", line 110, in setup_train
    raise NotImplementedError("FCOS train is not supported by upstream detectron2. " \
NotImplementedError: FCOS train is not supported by upstream detectron2. See GH Issue: https://github.com/facebookresearch/detectron2/issues/4369.

WARNING:root:detectron2_maskrcnn_r_50_c4 failed to load
Eager model failed to run
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1013, in validate_model
    self.model_iter_fn(model, example_inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 337, in forward_and_backward_pass
    loss = self.compute_loss(pred)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 327, in compute_loss
    return reduce_to_scalar_loss(pred)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/testing.py", line 99, in reduce_to_scalar_loss
    return sum([reduce_to_scalar_loss(x) for x in out]) / len(out)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/testing.py", line 99, in <listcomp>
    return sum([reduce_to_scalar_loss(x) for x in out]) / len(out)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/testing.py", line 109, in reduce_to_scalar_loss
    return sum([reduce_to_scalar_loss(value) for value in out.values()]) / len(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/testing.py", line 109, in <listcomp>
    return sum([reduce_to_scalar_loss(value) for value in out.values()]) / len(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/testing.py", line 114, in reduce_to_scalar_loss
    raise NotImplementedError("Don't know how to reduce", type(out))
NotImplementedError: ("Don't know how to reduce", <class 'detectron2.structures.instances.Instances'>)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1961, in run
    device, name, model, example_inputs, batch_size = runner.load_model(
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 283, in load_model
    self.validate_model(model, example_inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1015, in validate_model
    raise NotImplementedError("Eager model failed to run")
NotImplementedError: Eager model failed to run

Running torchbench.py dlrm...
[2022-11-19 14:46:11,489] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,493] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,496] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,500] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,504] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,507] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,511] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:11,515] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-19 14:46:12,205] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,213] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,220] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,227] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,235] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,242] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,249] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
[2022-11-19 14:46:12,256] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._sparse_coo_tensor_with_dims_and_tensors
cuda train dlrm                               PASS
/scratch/ezyang/work/b/pytorch/torch/utils/tensorboard/__init__.py:4: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
  if not hasattr(tensorboard, "__version__") or LooseVersion(
/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/gym/core.py:317: DeprecationWarning: [33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.[0m
  deprecation(
Running torchbench.py drq...
cuda train drq                                PASS
Running torchbench.py fastNLP_Bert...
[2022-11-19 14:46:39,176] torch._inductor.ir: [WARNING] DeviceCopy
[2022-11-19 14:47:12,474] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 925, in aot_dispatch_autograd
    fw_module, bw_module = aot_config.partition_fn(fx_g, joint_inputs, num_fwd_outputs=_num_mutated_data_inputs + _num_outs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/partitioners.py", line 428, in min_cut_rematerialization_partition
    cut_value, partition = nx.minimum_cut(nx_graph, "source", "sink")
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/networkx-3.0rc1-py3.10.egg/networkx/algorithms/flow/maxflow.py", line 450, in minimum_cut
    R = flow_func(flowG, _s, _t, capacity=capacity, value_only=True, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/networkx-3.0rc1-py3.10.egg/networkx/algorithms/flow/preflowpush.py", line 421, in preflow_push
    R = preflow_push_impl(G, s, t, capacity, residual, global_relabel_freq, value_only)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/networkx-3.0rc1-py3.10.egg/networkx/algorithms/flow/preflowpush.py", line 27, in preflow_push_impl
    raise nx.NetworkXError(f"node {str(t)} not in graph")
networkx.exception.NetworkXError: node sink not in graph
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/fastNLP/models/bert.py", line 265, in forward
    sequence_output = self.bert(words)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/fastNLP/embeddings/bert_embedding.py", line 137, in forward
    outputs = self.model(words)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/fastNLP/embeddings/bert_embedding.py", line 140, in <graph break in forward>
    return self.dropout(outputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train fastNLP_Bert                       FAIL
Running torchbench.py functorch_dp_cifar10...
[2022-11-19 14:47:37,014] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 282, in output
    assert all(
AssertionError: [TensorBox(StorageBox(
  MatrixMultiplyAdd(
    name=buf111,
    layout=FlexibleLayout('cuda', torch.float32, size=[2, 1000], stride=[1000, 1]),
    inputs=[InputBuffer(name='primals_62', layout=FixedLayout('cuda', torch.float32, size=[1000], stride=[1])), ReinterpretView(
      StorageBox(
        ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 1, 1], stride=[512, 1, 1, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf109, i1 + 512 * i0) / index_expr(1, torch.float32),
          ranges=[2, 512, 1, 1],
          origins={mul_56, unsqueeze_116, sub_19, add_46, view_37, add_47, sqrt_19, sub_18, unsqueeze_109, unsqueeze_118, var_mean_18, unsqueeze_113, primals_58, primals_57, unsqueeze_108, primals_60, convolution_18, unsqueeze_114, unsqueeze_119, primals_59, convolution_19, add_45, unsqueeze_111, unsqueeze_110, var_mean_19, mean, mul_55, reciprocal_19, relu_14, view_39, relu_16, add_43, relu_15, primals_55, unsqueeze_112, mul_58, mul_59, reciprocal_18, view_36, add_44, unsqueeze_117, view_38, primals_56, unsqueeze_115, sqrt_18}
        ))
      ),
      FixedLayout('cuda', torch.float32, size=(2, 512), stride=[512, 1]),
      no origins?
    ), ReinterpretView(
      StorageBox(
        InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[1000, 512], stride=[512, 1]))
      ),
      FixedLayout('cuda', torch.float32, size=[512, 1000], stride=[1, 512]),
      no origins?
    )],
    constant_args=(),
    kwargs={'beta': 1, 'alpha': 1},
    output_view=None,
    origins={mul_56, unsqueeze_116, sub_19, add_46, view_37, add_47, sqrt_19, sub_18, primals_62, unsqueeze_109, unsqueeze_118, var_mean_18, unsqueeze_113, primals_58, primals_57, permute, unsqueeze_108, primals_60, convolution_18, unsqueeze_114, unsqueeze_119, primals_59, convolution_19, add_45, view_40, unsqueeze_111, unsqueeze_110, var_mean_19, mean, mul_55, addmm, reciprocal_19, relu_14, view_39, relu_16, add_43, relu_15, primals_55, unsqueeze_112, mul_58, mul_59, reciprocal_18, view_36, add_44, primals_61, unsqueeze_117, view_38, primals_56, unsqueeze_115, sqrt_18}
  )
)), TensorBox(StorageBox(
  InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 7, 7], stride=[147, 49, 7, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.float32, size=[64], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[64], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_8', layout=FixedLayout('cuda', torch.float32, size=[64], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[64], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_14', layout=FixedLayout('cuda', torch.float32, size=[64], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[128], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[128, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_20', layout=FixedLayout('cuda', torch.float32, size=[128], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[128], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[128, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_26', layout=FixedLayout('cuda', torch.float32, size=[128], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[128, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[128], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_32', layout=FixedLayout('cuda', torch.float32, size=[256], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[256], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_38', layout=FixedLayout('cuda', torch.float32, size=[256], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_44', layout=FixedLayout('cuda', torch.float32, size=[256], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[512, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[512], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[512, 512, 3, 3], stride=[4608, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_50', layout=FixedLayout('cuda', torch.float32, size=[512], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[512, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[512], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[512, 512, 3, 3], stride=[4608, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_56', layout=FixedLayout('cuda', torch.float32, size=[512], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[512, 512, 3, 3], stride=[4608, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_59', layout=FixedLayout('cuda', torch.float32, size=[512], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_63', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
  Convolution(
    name=buf0,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 16, 16]), stride=(16384, 256, 16, 1)),
    inputs=[InputBuffer(name='primals_63', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1])), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 7, 7], stride=[147, 49, 7, 1]))],
    constant_args=(None, (2, 2), (3, 3), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution, primals_1, primals_63}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf3, i1 + 32 * i0) / index_expr(512, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={view, var_mean, convolution, primals_1, primals_63}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={convolution, squeeze, view, var_mean, primals_1, primals_63}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={convolution, squeeze, view, var_mean, primals_1, primals_63, squeeze_1}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf2, i1 + 32 * i0) / index_expr(512, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={add, reciprocal, var_mean, primals_1, primals_63, view, convolution, sqrt}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={add, reciprocal, squeeze_2, var_mean, primals_1, primals_63, view, convolution, sqrt}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={add, reciprocal, squeeze_2, var_mean, primals_1, primals_63, view, squeeze_3, convolution, sqrt}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf4', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 16, 16), stride=[16384, 256, 16, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf0, 16 * ModularIndexing(i3 + 16 * i2, 16, 16) + 256 * ModularIndexing(i1, 1, 2) + 512 * ModularIndexing(i1, 2, 32) + 16384 * i0 + ModularIndexing(i3, 1, 16)) - load(buf3, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(512, torch.float32) * reciprocal(sqrt(load(buf2, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(512, torch.float32) + constant(1e-05, torch.float32))) * load(primals_2, i1) + load(primals_3, i1)),
    ranges=(2, 64, 16, 16),
    origins={unsqueeze_3, reciprocal, primals_63, view_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, var_mean, primals_1, relu, unsqueeze, sub, primals_2, view, primals_3, sqrt, add_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=[2, 64, 8, 8], stride=[4096, 64, 8, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf))))))))),
    ranges=[2, 64, 8, 8],
    origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, view_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, var_mean, primals_1, relu, unsqueeze, sub, primals_2, view, primals_3, sqrt, add_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf6', layout=FlexibleLayout('cuda', torch.int64, size=[2, 64, 8, 8], stride=[4096, 64, 8, 1]), data=Pointwise(
    'cuda',
    torch.int64,
    where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf)))))))), index_expr(17 + 2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf))))))), index_expr(16 + 2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf)))))), index_expr(15 + 2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf))))), index_expr(1 + 2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf)))), index_expr(2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf))), index_expr(-1 + 2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf)), index_expr(-15 + 2 * i3 + 32 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf) > masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), index_expr(-16 + 2 * i3 + 32 * i2, torch.int64), index_expr(-17 + 2 * i3 + 32 * i2, torch.int64))))))))),
    ranges=[2, 64, 8, 8],
    origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, view_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, var_mean, primals_1, relu, unsqueeze, sub, primals_2, view, primals_3, sqrt, add_1}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf7,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 8, 8]), stride=(4096, 64, 8, 1)),
    inputs=[ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=[2, 64, 8, 8], stride=[4096, 64, 8, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -1 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -15 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -16 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(16, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(16, torch.int64), load(buf4, -17 + 2 * i3 + 32 * i2 + 256 * i1 + 16384 * i0), -inf))))))))),
      ranges=[2, 64, 8, 8],
      origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, view_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, var_mean, primals_1, relu, unsqueeze, sub, primals_2, view, primals_3, sqrt, add_1}
    )), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, convolution_1, view_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, primals_4, primals_1, relu, unsqueeze, var_mean, sub, primals_2, view, primals_3, sqrt, add_1}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf10, i1 + 32 * i0) / index_expr(128, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, convolution_1, view_1, unsqueeze_1, convolution, mul_2, view_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, primals_4, primals_1, relu, unsqueeze, var_mean, sub, primals_2, view, var_mean_1, primals_3, sqrt, add_1}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, convolution_1, view_1, unsqueeze_1, convolution, mul_2, view_2, squeeze_4, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, primals_4, primals_1, relu, unsqueeze, var_mean, sub, primals_2, view, var_mean_1, primals_3, sqrt, add_1}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, primals_63, convolution_1, view_1, unsqueeze_1, convolution, mul_2, view_2, squeeze_4, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, primals_4, primals_1, relu, squeeze_5, unsqueeze, var_mean, sub, primals_2, view, var_mean_1, primals_3, sqrt, add_1}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf9, i1 + 32 * i0) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, reciprocal_1, sqrt_1, primals_63, convolution_1, view_1, add_2, unsqueeze_1, convolution, mul_2, view_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, primals_4, primals_1, relu, unsqueeze, var_mean, sub, primals_2, view, var_mean_1, primals_3, sqrt, add_1}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_3, reciprocal, max_pool2d_with_indices, reciprocal_1, squeeze_6, sqrt_1, primals_63, convolution_1, view_1, add_2, unsqueeze_1, convolution, mul_2, view_2, unsqueeze_4, mul_1, add, unsqueeze_2, unsqueeze_5, primals_4, primals_1, relu, unsqueeze, var_mean, sub, primals_2, view, var_mean_1, primals_3, sqrt, add_1}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, reciprocal_1, squeeze_6, convolution_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, primals_4, primals_1, relu, unsqueeze, sub, view, var_mean_1, add_1, unsqueeze_3, sqrt_1, primals_63, view_1, squeeze_7, view_2, unsqueeze_5, var_mean, primals_2, primals_3, sqrt, add_2}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf7, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf10, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf9, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_5, i1) + load(primals_6, i1)),
    ranges=(2, 64, 8, 8),
    origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf12,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 8, 8]), stride=(4096, 64, 8, 1)),
    inputs=[ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf7, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf10, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf9, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_5, i1) + load(primals_6, i1)),
      ranges=(2, 64, 8, 8),
      origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
    )), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, primals_7, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf15, i1 + 32 * i0) / index_expr(128, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, primals_7, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, unsqueeze_2, view_4, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, var_mean_2, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, primals_7, unsqueeze_1, convolution, mul_2, squeeze_8, unsqueeze_4, mul_1, add, unsqueeze_2, view_4, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, var_mean_2, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, primals_7, unsqueeze_1, convolution, mul_2, squeeze_8, unsqueeze_4, mul_1, add, unsqueeze_2, view_4, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, squeeze_9, var_mean_2, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf14, i1 + 32 * i0) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, add_4, reciprocal_2, primals_7, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, sqrt_2, unsqueeze_2, view_4, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, var_mean_2, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, add_4, reciprocal_2, primals_7, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, sqrt_2, unsqueeze_2, squeeze_10, view_4, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, var_mean_2, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, reciprocal_1, convolution_1, add_4, reciprocal_2, primals_7, unsqueeze_1, convolution, mul_2, unsqueeze_4, mul_1, add, sqrt_2, unsqueeze_2, squeeze_10, view_4, primals_4, primals_1, view_3, relu, unsqueeze, sub, mul_4, view, unsqueeze_10, var_mean_1, add_1, unsqueeze_3, primals_5, unsqueeze_9, convolution_2, sqrt_1, primals_63, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, squeeze_11, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, var_mean, var_mean_2, mul_5, primals_2, relu_1, primals_3, sqrt, add_2}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf16', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf12, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf15, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf14, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_8, i1) + load(primals_9, i1) + load(buf5, i3 + 8 * i2 + 64 * i1 + 4096 * i0)),
    ranges=(2, 64, 8, 8),
    origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_3, sqrt, reciprocal_1, add_4, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf17,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 8, 8]), stride=(4096, 64, 8, 1)),
    inputs=[ComputedBuffer(name='buf16', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf12, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf15, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf14, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_8, i1) + load(primals_9, i1) + load(buf5, i3 + 8 * i2 + 64 * i1 + 4096 * i0)),
      ranges=(2, 64, 8, 8),
      origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_3, sqrt, reciprocal_1, add_4, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
    )), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_1, add_4, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf20, i1 + 32 * i0) / index_expr(128, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, var_mean_3, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, view_6, reciprocal_1, add_4, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, squeeze_12, unsqueeze_17, unsqueeze_3, unsqueeze_9, var_mean_3, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, view_6, reciprocal_1, add_4, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, squeeze_12, unsqueeze_17, unsqueeze_3, unsqueeze_9, var_mean_3, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, view_6, reciprocal_1, add_4, squeeze_13, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf19, i1 + 32 * i0) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, add_7, var_mean_3, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, view_6, reciprocal_1, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, add_7, var_mean_3, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, view_6, reciprocal_1, squeeze_14, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, relu, unsqueeze, unsqueeze_17, unsqueeze_3, unsqueeze_9, add_7, var_mean_3, primals_63, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, view_6, reciprocal_1, squeeze_14, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, squeeze_15, unsqueeze_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_8, mul_4, view, unsqueeze_10, var_mean_1, add_1, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf21', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf17, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf20, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf19, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_11, i1) + load(primals_12, i1)),
    ranges=(2, 64, 8, 8),
    origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, relu_3, unsqueeze_20, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf22,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 8, 8]), stride=(4096, 64, 8, 1)),
    inputs=[ComputedBuffer(name='buf21', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf17, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf20, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf19, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_11, i1) + load(primals_12, i1)),
      ranges=(2, 64, 8, 8),
      origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, relu_3, unsqueeze_20, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
    )), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[64, 64, 3, 3], stride=[576, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, convolution_4, primals_1, mul_10, relu, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf25, i1 + 32 * i0) / index_expr(128, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, var_mean_4, sub_2, view_3, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, view_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_5, convolution_1, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, squeeze_16, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, var_mean_4, sub_2, view_3, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, view_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, squeeze_17, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, squeeze_16, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, var_mean_4, sub_2, view_3, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, view_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf24, i1 + 32 * i0) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, add_9, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, reciprocal_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, var_mean_4, sub_2, view_3, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, view_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_9, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, reciprocal_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, var_mean_4, sub_2, view_3, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, squeeze_18, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, view_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, add_9, convolution_1, add_5, primals_7, convolution, mul_2, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, squeeze_19, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, sqrt_3, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, reciprocal_4, mul_1, sqrt_2, unsqueeze_2, view_4, primals_4, var_mean_4, sub_2, view_3, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_19, primals_5, squeeze_18, view_5, convolution_3, convolution_2, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, view_2, unsqueeze_11, unsqueeze_8, view_8, add_3, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, primals_12}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf22, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf25, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf24, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_14, i1) + load(primals_15, i1) + load(buf16, i3 + 8 * i2 + 64 * i1 + 4096 * i0)),
    ranges=(2, 64, 8, 8),
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf27,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 128, 4, 4]), stride=(2048, 16, 4, 1)),
    inputs=[ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf22, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf25, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf24, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_14, i1) + load(primals_15, i1) + load(buf16, i3 + 8 * i2 + 64 * i1 + 4096 * i0)),
      ranges=(2, 64, 8, 8),
      origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
    )), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf30, i1 + 32 * i0) / index_expr(64, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, squeeze_20, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, squeeze_21, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, squeeze_20, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf29, i1 + 32 * i0) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_10, var_mean_5, reciprocal_5, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, sqrt_5, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_12, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_10, var_mean_5, reciprocal_5, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, sqrt_5, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_12, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, squeeze_22, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_10, var_mean_5, reciprocal_5, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, convolution_5, var_mean, primals_2, primals_10, sqrt_5, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_12, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, primals_16, convolution_2, unsqueeze_24, add_6, squeeze_23, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, squeeze_22, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf31', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf27, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf30, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf29, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_17, i1) + load(primals_18, i1)),
    ranges=(2, 128, 4, 4),
    origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_2, add_13, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, view_3, var_mean_4, sqrt_4, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf32,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 128, 4, 4]), stride=(2048, 16, 4, 1)),
    inputs=[ComputedBuffer(name='buf31', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf27, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf30, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf29, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_17, i1) + load(primals_18, i1)),
      ranges=(2, 128, 4, 4),
      origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_2, add_13, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, view_3, var_mean_4, sqrt_4, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
    )), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[128, 128, 3, 3], stride=[1152, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_13, add_2, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, view_3, var_mean_4, sqrt_4, primals_19, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf35, i1 + 32 * i0) / index_expr(64, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, var_mean_6, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_13, add_2, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, view_3, var_mean_4, sqrt_4, primals_19, view_12, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, var_mean_6, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, squeeze_24, unsqueeze_23, unsqueeze_15, var_mean_2, add_2, add_13, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, view_3, var_mean_4, sqrt_4, primals_19, view_12, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, var_mean_6, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, squeeze_25, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, squeeze_24, unsqueeze_23, unsqueeze_15, var_mean_2, add_13, add_2, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, view_3, var_mean_4, sqrt_4, primals_19, view_12, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf34, i1 + 32 * i0) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, var_mean_6, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_2, add_13, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, sqrt_6, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, add_14, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, reciprocal_6, view_3, var_mean_4, sqrt_4, primals_19, view_12, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, var_mean_6, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_13, add_2, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, sqrt_6, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, add_14, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, reciprocal_6, view_3, var_mean_4, sqrt_4, primals_19, view_12, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, primals_15, mul_16, unsqueeze_5, unsqueeze_12, squeeze_26, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={max_pool2d_with_indices, add_9, add_11, unsqueeze_33, primals_7, view_9, add, primals_1, mul_10, relu, unsqueeze_17, view_10, var_mean_5, view_7, primals_11, reciprocal_5, add_7, primals_63, var_mean_3, unsqueeze_16, primals_9, convolution_5, var_mean, primals_2, primals_10, unsqueeze_34, primals_3, reciprocal_3, var_mean_6, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, add_4, add_12, sqrt_3, unsqueeze_29, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_32, view_4, primals_4, sub_2, sub, unsqueeze_14, mul_4, view, unsqueeze_10, mul_17, unsqueeze_20, add_1, unsqueeze_26, primals_5, convolution_2, add_8, primals_6, view_1, unsqueeze_11, sub_5, add_3, unsqueeze_23, unsqueeze_15, var_mean_2, add_2, add_13, reciprocal, convolution_1, add_5, view_11, convolution, mul_2, convolution_4, unsqueeze, mul_11, unsqueeze_31, primals_18, sqrt_6, unsqueeze_3, unsqueeze_9, primals_17, primals_8, sqrt_5, add_14, sqrt, primals_12, primals_14, mul_14, reciprocal_2, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, unsqueeze_30, reciprocal_6, view_3, var_mean_4, sqrt_4, primals_19, view_12, mul_8, var_mean_1, relu_5, primals_13, relu_3, unsqueeze_19, sub_4, view_5, convolution_3, unsqueeze_28, primals_16, unsqueeze_24, add_6, sqrt_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_35, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_8, convolution_6, view_8, squeeze_27, primals_15, mul_16, unsqueeze_5, unsqueeze_12, squeeze_26, mul_5, relu_1, unsqueeze_13, mul_7, relu_4}
  )
), TensorBox(StorageBox(
  Convolution(
    name=buf36,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 128, 4, 4]), stride=(2048, 16, 4, 1)),
    inputs=[ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=(2, 64, 8, 8), stride=[4096, 64, 8, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf22, 8 * ModularIndexing(i3 + 8 * i2, 8, 8) + 64 * ModularIndexing(i1, 1, 2) + 128 * ModularIndexing(i1, 2, 32) + 4096 * i0 + ModularIndexing(i3, 1, 8)) - load(buf25, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) * reciprocal(sqrt(load(buf24, 32 * i0 + ModularIndexing(i1, 2, 32)) / index_expr(128, torch.float32) + constant(1e-05, torch.float32))) * load(primals_14, i1) + load(primals_15, i1) + load(buf16, i3 + 8 * i2 + 64 * i1 + 4096 * i0)),
      ranges=(2, 64, 8, 8),
      origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
    )), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 1, 1], stride=[64, 1, 1, 1]))],
    constant_args=(None, (2, 2), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, convolution_7, add_8, primals_6, view_1, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf39, i1 + 32 * i0) / index_expr(64, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, view_14, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean_7, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, convolution_7, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, squeeze_28, view_7, primals_11, unsqueeze_3, view_14, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean_7, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, convolution_7, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, squeeze_29, convolution_4, unsqueeze, mul_11, unsqueeze_17, squeeze_28, view_7, primals_11, unsqueeze_3, view_14, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean_7, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sub_4, primals_5, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, convolution_7, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf38, i1 + 32 * i0) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, view_14, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean_7, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, add_16, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sqrt_7, primals_5, sub_4, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, convolution_7, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, reciprocal_7, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, view_14, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean_7, var_mean, primals_2, primals_10, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, squeeze_30, sub_3, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, add_16, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sqrt_7, primals_5, sub_4, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, convolution_7, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, reciprocal_7, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal, max_pool2d_with_indices, add_9, add_11, convolution_1, add_5, primals_7, convolution, mul_2, view_9, add, primals_1, mul_10, relu, convolution_4, unsqueeze, mul_11, unsqueeze_17, view_7, primals_11, unsqueeze_3, view_14, unsqueeze_9, add_7, primals_63, var_mean_3, unsqueeze_16, primals_8, primals_9, var_mean_7, var_mean, primals_2, primals_10, squeeze_31, primals_3, sqrt, reciprocal_3, add_10, unsqueeze_18, view_6, reciprocal_1, sub_3, squeeze_30, primals_12, primals_14, add_4, sqrt_3, unsqueeze_29, mul_14, reciprocal_2, unsqueeze_1, primals_22, relu_2, unsqueeze_21, unsqueeze_4, unsqueeze_22, mul_1, add_16, reciprocal_4, sqrt_2, unsqueeze_2, view_4, primals_4, view_3, sub_2, var_mean_4, sqrt_4, sub, unsqueeze_14, mul_4, mul_8, view, unsqueeze_10, var_mean_1, primals_13, unsqueeze_20, relu_3, add_1, unsqueeze_26, unsqueeze_19, sqrt_7, primals_5, sub_4, view_5, convolution_3, unsqueeze_28, convolution_2, unsqueeze_24, add_6, sqrt_1, add_8, primals_6, view_1, convolution_7, unsqueeze_6, unsqueeze_7, sub_1, mul_13, unsqueeze_27, view_2, unsqueeze_25, unsqueeze_11, unsqueeze_8, view_8, add_3, primals_15, unsqueeze_5, unsqueeze_23, reciprocal_7, unsqueeze_15, unsqueeze_12, var_mean_2, mul_5, relu_1, unsqueeze_13, mul_7, add_2, relu_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf41', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf40, i3 + 4 * i2 + 16 * i1 + 2048 * i0)),
    ranges=(2, 128, 4, 4),
    origins={relu_6}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf42,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 128, 4, 4]), stride=(2048, 16, 4, 1)),
    inputs=[ComputedBuffer(name='buf41', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf40, i3 + 4 * i2 + 16 * i1 + 2048 * i0)),
      ranges=(2, 128, 4, 4),
      origins={relu_6}
    )), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[128, 128, 3, 3], stride=[1152, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_25, relu_6, convolution_8}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf45, i1 + 32 * i0) / index_expr(64, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={primals_25, view_16, relu_6, convolution_8, var_mean_8}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={squeeze_32, primals_25, view_16, relu_6, convolution_8, var_mean_8}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={squeeze_32, primals_25, view_16, convolution_8, relu_6, squeeze_33, var_mean_8}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf44, i1 + 32 * i0) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={reciprocal_8, relu_6, convolution_8, sqrt_8, var_mean_8, primals_25, view_16, add_19}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={reciprocal_8, relu_6, convolution_8, sqrt_8, var_mean_8, primals_25, view_16, squeeze_34, add_19}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={reciprocal_8, relu_6, convolution_8, sqrt_8, var_mean_8, primals_25, view_16, squeeze_34, add_19, squeeze_35}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf46', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf42, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf45, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf44, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_26, i1) + load(primals_27, i1)),
    ranges=(2, 128, 4, 4),
    origins={unsqueeze_51, unsqueeze_50, add_20, relu_7, sqrt_8, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, sub_8, primals_26, relu_6, convolution_8, var_mean_8, primals_25, unsqueeze_48, view_16, unsqueeze_53, mul_25, mul_26}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf47,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 128, 4, 4]), stride=(2048, 16, 4, 1)),
    inputs=[ComputedBuffer(name='buf46', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf42, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf45, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf44, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_26, i1) + load(primals_27, i1)),
      ranges=(2, 128, 4, 4),
      origins={unsqueeze_51, unsqueeze_50, add_20, relu_7, sqrt_8, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, sub_8, primals_26, relu_6, convolution_8, var_mean_8, primals_25, unsqueeze_48, view_16, unsqueeze_53, mul_25, mul_26}
    )), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[128, 128, 3, 3], stride=[1152, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_51, unsqueeze_50, add_20, relu_7, sqrt_8, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, relu_6, convolution_8, var_mean_8, primals_25, unsqueeze_48, view_16, unsqueeze_53, primals_28, mul_25, mul_26}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf50, i1 + 32 * i0) / index_expr(64, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_51, unsqueeze_50, add_20, relu_7, var_mean_9, sqrt_8, view_18, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, relu_6, convolution_8, var_mean_8, primals_25, unsqueeze_48, view_16, unsqueeze_53, primals_28, mul_25, mul_26}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_51, unsqueeze_50, squeeze_36, add_20, relu_7, var_mean_9, sqrt_8, view_18, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, relu_6, convolution_8, var_mean_8, primals_25, unsqueeze_48, view_16, unsqueeze_53, primals_28, mul_25, mul_26}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_51, unsqueeze_50, add_20, squeeze_36, squeeze_37, relu_7, var_mean_9, sqrt_8, view_18, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, relu_6, convolution_8, var_mean_8, primals_25, unsqueeze_48, view_16, unsqueeze_53, primals_28, mul_25, mul_26}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf49, i1 + 32 * i0) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_51, unsqueeze_50, sqrt_9, add_20, relu_7, var_mean_9, sqrt_8, view_18, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, reciprocal_9, relu_6, var_mean_8, convolution_8, primals_25, add_21, unsqueeze_48, view_16, unsqueeze_53, primals_28, mul_25, mul_26}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_51, unsqueeze_50, sqrt_9, add_20, relu_7, var_mean_9, sqrt_8, view_18, primals_27, unsqueeze_49, add_19, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, reciprocal_9, relu_6, var_mean_8, convolution_8, primals_25, add_21, unsqueeze_48, view_16, squeeze_38, unsqueeze_53, primals_28, mul_25, mul_26}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_51, unsqueeze_50, sqrt_9, add_20, relu_7, var_mean_9, sqrt_8, view_18, primals_27, unsqueeze_49, add_19, squeeze_39, view_17, reciprocal_8, unsqueeze_52, convolution_9, sub_8, primals_26, reciprocal_9, relu_6, var_mean_8, convolution_8, primals_25, add_21, unsqueeze_48, view_16, unsqueeze_53, squeeze_38, primals_28, mul_25, mul_26}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf47, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf50, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf49, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_29, i1) + load(primals_30, i1) + load(buf41, i3 + 4 * i2 + 16 * i1 + 2048 * i0)),
    ranges=(2, 128, 4, 4),
    origins={primals_29, primals_30, sqrt_8, var_mean_9, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf52,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 256, 2, 2]), stride=(1024, 4, 2, 1)),
    inputs=[ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf47, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf50, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf49, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_29, i1) + load(primals_30, i1) + load(buf41, i3 + 4 * i2 + 16 * i1 + 2048 * i0)),
      ranges=(2, 128, 4, 4),
      origins={primals_29, primals_30, sqrt_8, var_mean_9, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
    )), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_29, primals_30, var_mean_9, sqrt_8, view_18, primals_31, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf55, i1 + 32 * i0) / index_expr(32, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={primals_29, view_20, primals_30, var_mean_9, sqrt_8, view_18, primals_31, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, var_mean_10, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={primals_29, view_20, primals_30, sqrt_8, var_mean_9, view_18, primals_31, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, squeeze_40, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, var_mean_10, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={primals_29, view_20, primals_30, var_mean_9, sqrt_8, view_18, primals_31, view_19, add_19, view_17, reciprocal_8, squeeze_41, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, squeeze_40, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, var_mean_10, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf54, i1 + 32 * i0) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={primals_29, view_20, primals_30, sqrt_8, var_mean_9, view_18, primals_31, view_19, add_19, reciprocal_10, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, sqrt_10, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, var_mean_10, sub_9, add_24, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={primals_29, view_20, primals_30, var_mean_9, sqrt_8, view_18, primals_31, view_19, add_19, reciprocal_10, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, sqrt_10, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, squeeze_42, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, var_mean_10, sub_9, add_24, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={primals_29, view_20, primals_30, sqrt_8, var_mean_9, view_18, primals_31, view_19, add_19, reciprocal_10, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, sqrt_10, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, squeeze_42, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, var_mean_10, sub_9, add_24, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_48, squeeze_43, unsqueeze_56, primals_28}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf52, 2 * ModularIndexing(i3 + 2 * i2, 2, 2) + 4 * ModularIndexing(i1, 1, 8) + 32 * ModularIndexing(i1, 8, 32) + 1024 * i0 + ModularIndexing(i3, 1, 2)) - load(buf55, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) * reciprocal(sqrt(load(buf54, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))) * load(primals_32, i1) + load(primals_33, i1)),
    ranges=(2, 256, 2, 2),
    origins={view_20, view_18, add_19, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, sqrt_10, mul_25, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, relu_9, unsqueeze_56, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf57,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 256, 2, 2]), stride=(1024, 4, 2, 1)),
    inputs=[ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf52, 2 * ModularIndexing(i3 + 2 * i2, 2, 2) + 4 * ModularIndexing(i1, 1, 8) + 32 * ModularIndexing(i1, 8, 32) + 1024 * i0 + ModularIndexing(i3, 1, 2)) - load(buf55, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) * reciprocal(sqrt(load(buf54, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))) * load(primals_32, i1) + load(primals_33, i1)),
      ranges=(2, 256, 2, 2),
      origins={view_20, view_18, add_19, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, sqrt_10, mul_25, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, relu_9, unsqueeze_56, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
    )), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={view_20, primals_34, view_18, add_19, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, mul_25, sqrt_10, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_56, relu_9, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf60, i1 + 32 * i0) / index_expr(32, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={view_20, primals_34, view_18, add_19, view_22, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, mul_25, sqrt_10, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_56, relu_9, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, var_mean_11, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={view_20, primals_34, view_18, add_19, view_22, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, sqrt_10, mul_25, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, relu_9, unsqueeze_56, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, squeeze_44, var_mean_11, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={view_20, primals_34, view_18, squeeze_45, add_19, view_22, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, mul_25, sqrt_10, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_56, relu_9, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, squeeze_44, var_mean_11, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf59, i1 + 32 * i0) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={view_20, primals_34, view_18, add_19, view_22, view_17, unsqueeze_61, add_25, view_16, unsqueeze_62, unsqueeze_57, sqrt_10, mul_25, add_23, unsqueeze_51, reciprocal_11, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, relu_9, unsqueeze_56, add_26, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, var_mean_11, sqrt_11, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={view_20, primals_34, view_18, add_19, view_22, view_17, unsqueeze_61, add_25, squeeze_46, view_16, unsqueeze_62, unsqueeze_57, mul_25, sqrt_10, add_23, unsqueeze_51, reciprocal_11, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, unsqueeze_56, relu_9, add_26, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, var_mean_11, sqrt_11, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={view_20, primals_34, view_18, add_19, view_22, view_17, unsqueeze_61, add_25, squeeze_46, view_16, unsqueeze_62, unsqueeze_57, sqrt_10, mul_25, add_23, unsqueeze_51, reciprocal_11, unsqueeze_50, sqrt_9, relu_8, unsqueeze_64, unsqueeze_59, mul_28, sub_9, sub_10, convolution_9, relu_6, convolution_8, convolution_10, reciprocal_9, add_21, relu_9, unsqueeze_56, add_26, primals_29, primals_30, var_mean_9, sqrt_8, unsqueeze_63, primals_31, primals_32, view_19, var_mean_11, sqrt_11, primals_33, reciprocal_10, reciprocal_8, primals_26, var_mean_8, primals_25, view_21, unsqueeze_53, unsqueeze_55, mul_26, unsqueeze_65, convolution_11, add_20, relu_7, mul_32, mul_29, primals_27, unsqueeze_58, unsqueeze_49, mul_31, add_22, var_mean_10, squeeze_47, add_24, unsqueeze_52, unsqueeze_54, sub_8, unsqueeze_60, unsqueeze_48, primals_28}
  )
), TensorBox(StorageBox(
  Convolution(
    name=buf61,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 256, 2, 2]), stride=(1024, 4, 2, 1)),
    inputs=[ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=(2, 128, 4, 4), stride=[2048, 16, 4, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf47, 4 * ModularIndexing(i3 + 4 * i2, 4, 4) + 16 * ModularIndexing(i1, 1, 4) + 64 * ModularIndexing(i1, 4, 32) + 2048 * i0 + ModularIndexing(i3, 1, 4)) - load(buf50, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) * reciprocal(sqrt(load(buf49, 32 * i0 + ModularIndexing(i1, 4, 32)) / index_expr(64, torch.float32) + constant(1e-05, torch.float32))) * load(primals_29, i1) + load(primals_30, i1) + load(buf41, i3 + 4 * i2 + 16 * i1 + 2048 * i0)),
      ranges=(2, 128, 4, 4),
      origins={primals_29, primals_30, sqrt_8, var_mean_9, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
    )), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 1, 1], stride=[128, 1, 1, 1]))],
    constant_args=(None, (2, 2), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_29, primals_37, primals_30, var_mean_9, sqrt_8, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, primals_25, convolution_12, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf64, i1 + 32 * i0) / index_expr(32, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={primals_29, primals_37, primals_30, var_mean_9, sqrt_8, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, var_mean_12, primals_25, convolution_12, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, view_24, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={primals_29, primals_37, primals_30, squeeze_48, sqrt_8, var_mean_9, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, var_mean_12, primals_25, convolution_12, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, view_24, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={primals_29, primals_37, primals_30, squeeze_48, sqrt_8, var_mean_9, view_18, view_19, add_19, view_17, reciprocal_8, primals_26, var_mean_8, var_mean_12, primals_25, squeeze_49, convolution_12, view_16, unsqueeze_53, unsqueeze_55, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, view_24, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, primals_28}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf63, i1 + 32 * i0) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={primals_29, primals_37, primals_30, sqrt_8, var_mean_9, add_28, view_18, view_19, add_19, sqrt_12, view_17, reciprocal_8, primals_26, var_mean_8, var_mean_12, primals_25, convolution_12, view_16, unsqueeze_53, unsqueeze_55, primals_28, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, view_24, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, reciprocal_12}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={primals_29, primals_37, primals_30, var_mean_9, sqrt_8, add_28, view_18, view_19, add_19, sqrt_12, view_17, reciprocal_8, squeeze_50, primals_26, var_mean_8, var_mean_12, primals_25, convolution_12, view_16, unsqueeze_53, unsqueeze_55, primals_28, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, view_24, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, reciprocal_12}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={primals_29, primals_37, primals_30, sqrt_8, var_mean_9, add_28, view_18, squeeze_51, view_19, add_19, sqrt_12, view_17, reciprocal_8, squeeze_50, primals_26, var_mean_8, var_mean_12, primals_25, convolution_12, view_16, unsqueeze_53, unsqueeze_55, primals_28, unsqueeze_57, mul_25, mul_26, add_23, unsqueeze_51, unsqueeze_50, sqrt_9, relu_8, add_20, relu_7, mul_29, unsqueeze_59, primals_27, mul_28, unsqueeze_58, unsqueeze_49, add_22, view_24, sub_9, unsqueeze_52, convolution_9, sub_8, unsqueeze_54, relu_6, convolution_8, reciprocal_9, add_21, unsqueeze_48, unsqueeze_56, reciprocal_12}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf66', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf65, i3 + 2 * i2 + 4 * i1 + 1024 * i0)),
    ranges=(2, 256, 2, 2),
    origins={relu_10}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf67,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 256, 2, 2]), stride=(1024, 4, 2, 1)),
    inputs=[ComputedBuffer(name='buf66', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf65, i3 + 2 * i2 + 4 * i1 + 1024 * i0)),
      ranges=(2, 256, 2, 2),
      origins={relu_10}
    )), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_13, relu_10, primals_40}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf70, i1 + 32 * i0) / index_expr(32, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={view_26, convolution_13, primals_40, var_mean_13, relu_10}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={view_26, convolution_13, primals_40, squeeze_52, var_mean_13, relu_10}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={view_26, convolution_13, primals_40, squeeze_52, var_mean_13, relu_10, squeeze_53}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf69, i1 + 32 * i0) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={var_mean_13, add_31, reciprocal_13, sqrt_13, view_26, convolution_13, primals_40, relu_10}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={var_mean_13, add_31, reciprocal_13, squeeze_54, sqrt_13, view_26, convolution_13, primals_40, relu_10}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={var_mean_13, squeeze_55, add_31, reciprocal_13, squeeze_54, sqrt_13, view_26, convolution_13, primals_40, relu_10}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf71', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf67, 2 * ModularIndexing(i3 + 2 * i2, 2, 2) + 4 * ModularIndexing(i1, 1, 8) + 32 * ModularIndexing(i1, 8, 32) + 1024 * i0 + ModularIndexing(i3, 1, 2)) - load(buf70, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) * reciprocal(sqrt(load(buf69, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))) * load(primals_41, i1) + load(primals_42, i1)),
    ranges=(2, 256, 2, 2),
    origins={unsqueeze_83, mul_41, var_mean_13, sqrt_13, convolution_13, relu_11, primals_41, unsqueeze_81, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, primals_42, view_26, view_27, add_32, primals_40, unsqueeze_80, unsqueeze_79, sub_13, relu_10}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf72,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 256, 2, 2]), stride=(1024, 4, 2, 1)),
    inputs=[ComputedBuffer(name='buf71', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf67, 2 * ModularIndexing(i3 + 2 * i2, 2, 2) + 4 * ModularIndexing(i1, 1, 8) + 32 * ModularIndexing(i1, 8, 32) + 1024 * i0 + ModularIndexing(i3, 1, 2)) - load(buf70, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) * reciprocal(sqrt(load(buf69, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))) * load(primals_41, i1) + load(primals_42, i1)),
      ranges=(2, 256, 2, 2),
      origins={unsqueeze_83, mul_41, var_mean_13, sqrt_13, convolution_13, relu_11, primals_41, unsqueeze_81, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, primals_42, view_26, view_27, add_32, primals_40, unsqueeze_80, unsqueeze_79, sub_13, relu_10}
    )), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_83, mul_41, var_mean_13, sqrt_13, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, primals_42, view_26, view_27, add_32, primals_40, primals_43, unsqueeze_80, unsqueeze_79, sub_13, relu_10}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf75, i1 + 32 * i0) / index_expr(32, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_83, mul_41, var_mean_13, sqrt_13, view_28, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, primals_42, view_26, view_27, var_mean_14, add_32, primals_40, primals_43, unsqueeze_80, unsqueeze_79, sub_13, relu_10}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_83, mul_41, var_mean_13, sqrt_13, view_28, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, unsqueeze_80, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, primals_42, view_26, view_27, add_32, squeeze_56, var_mean_14, primals_43, primals_40, unsqueeze_79, sub_13, relu_10}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_83, mul_41, var_mean_13, sqrt_13, view_28, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, unsqueeze_80, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, primals_42, view_26, view_27, add_32, var_mean_14, squeeze_56, primals_43, primals_40, unsqueeze_79, sub_13, relu_10, squeeze_57}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf74, i1 + 32 * i0) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_83, mul_41, reciprocal_14, var_mean_13, sqrt_13, view_28, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, sqrt_14, mul_40, unsqueeze_78, unsqueeze_82, add_31, add_33, primals_42, view_26, reciprocal_13, view_27, add_32, var_mean_14, primals_40, primals_43, unsqueeze_80, unsqueeze_79, sub_13, relu_10}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_83, mul_41, reciprocal_14, var_mean_13, sqrt_13, view_28, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, sqrt_14, mul_40, unsqueeze_78, unsqueeze_82, add_31, add_33, primals_42, view_26, reciprocal_13, view_27, add_32, var_mean_14, primals_40, primals_43, unsqueeze_80, unsqueeze_79, sub_13, relu_10, squeeze_58}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_83, mul_41, reciprocal_14, var_mean_13, sqrt_13, view_28, convolution_13, relu_11, convolution_14, primals_41, unsqueeze_81, sqrt_14, mul_40, unsqueeze_78, squeeze_59, unsqueeze_82, add_31, add_33, primals_42, view_26, reciprocal_13, view_27, add_32, var_mean_14, primals_40, primals_43, unsqueeze_80, unsqueeze_79, sub_13, relu_10, squeeze_58}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 4, 2, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf72, 2 * ModularIndexing(i3 + 2 * i2, 2, 2) + 4 * ModularIndexing(i1, 1, 8) + 32 * ModularIndexing(i1, 8, 32) + 1024 * i0 + ModularIndexing(i3, 1, 2)) - load(buf75, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) * reciprocal(sqrt(load(buf74, 32 * i0 + ModularIndexing(i1, 8, 32)) / index_expr(32, torch.float32) + constant(1e-05, torch.float32))) * load(primals_44, i1) + load(primals_45, i1) + load(buf66, i3 + 2 * i2 + 4 * i1 + 1024 * i0)),
    ranges=(2, 256, 2, 2),
    origins={unsqueeze_89, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf79,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 512, 1, 1]), stride=(512, 1, 1, 1)),
    inputs=[ComputedBuffer(name='buf78', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 1, 256, 512]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf76, i3 + 2 * i2 + 4 * i1 + 1024 * i0),
      ranges=(2, 256, 2, 2),
      origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
    )), ComputedBuffer(name='buf77', layout=FixedLayout('cuda', torch.float32, size=[512, 256, 3, 3], stride=[2304, 1, 256, 768]), data=Pointwise(
      'cuda',
      torch.float32,
      load(primals_46, i3 + 3 * i2 + 9 * i1 + 2304 * i0),
      ranges=[512, 256, 3, 3],
      origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
    ))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf82, i1 + 32 * i0) / index_expr(16, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_30, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, mul_44, reciprocal_13, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, var_mean_15, unsqueeze_79, sub_14}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_89, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, squeeze_60, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_30, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, mul_44, reciprocal_13, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, var_mean_15, unsqueeze_79, sub_14}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, squeeze_61, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, squeeze_60, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_30, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, mul_44, reciprocal_13, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, var_mean_15, unsqueeze_79, sub_14}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf81, i1 + 32 * i0) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_89, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, unsqueeze_84, reciprocal_15, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_30, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, add_36, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, mul_44, reciprocal_13, sqrt_15, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, var_mean_15, unsqueeze_79, sub_14}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, squeeze_62, var_mean_13, unsqueeze_84, reciprocal_15, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_30, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, add_36, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, mul_44, reciprocal_13, sqrt_15, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, var_mean_15, unsqueeze_79, sub_14}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_89, unsqueeze_83, mul_41, squeeze_63, reciprocal_14, squeeze_62, var_mean_13, unsqueeze_84, reciprocal_15, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_30, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, convolution_15, sub_13, relu_10, add_36, view_29, primals_44, mul_43, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, mul_44, reciprocal_13, sqrt_15, add_32, var_mean_14, primals_46, primals_40, unsqueeze_80, var_mean_15, unsqueeze_79, sub_14}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=(2, 512, 1, 1), stride=[512, 1, 512, 512]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf79, 16 * ModularIndexing(i1, 16, 32) + 512 * i0 + ModularIndexing(i1, 1, 16)) - load(buf82, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) * reciprocal(sqrt(load(buf81, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))) * load(primals_47, i1) + load(primals_48, i1)),
    ranges=(2, 512, 1, 1),
    origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, sqrt_14, unsqueeze_88, view_26, view_30, convolution_15, relu_10, unsqueeze_92, add_36, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf85,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 512, 1, 1]), stride=(512, 1, 1, 1)),
    inputs=[ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=(2, 512, 1, 1), stride=[512, 1, 512, 512]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf79, 16 * ModularIndexing(i1, 16, 32) + 512 * i0 + ModularIndexing(i1, 1, 16)) - load(buf82, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) * reciprocal(sqrt(load(buf81, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))) * load(primals_47, i1) + load(primals_48, i1)),
      ranges=(2, 512, 1, 1),
      origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, sqrt_14, unsqueeze_88, view_26, view_30, convolution_15, relu_10, unsqueeze_92, add_36, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
    )), ComputedBuffer(name='buf84', layout=FixedLayout('cuda', torch.float32, size=[512, 512, 3, 3], stride=[512, 1, 262144, 786432]), data=Pointwise(
      'cuda',
      torch.float32,
      load(primals_49, i3 + 3 * i2 + 9 * i1 + 4608 * i0),
      ranges=[512, 512, 3, 3],
      origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, sqrt_14, unsqueeze_88, view_26, view_30, convolution_15, relu_10, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
    ))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, sqrt_14, unsqueeze_88, view_26, view_30, convolution_15, relu_10, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf88, i1 + 32 * i0) / index_expr(16, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, view_32, sqrt_14, unsqueeze_88, view_26, view_30, convolution_15, relu_10, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, var_mean_16, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, view_32, sqrt_14, unsqueeze_88, view_26, view_30, convolution_15, relu_10, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, var_mean_16, view_27, unsqueeze_86, primals_43, sub_13, primals_48, squeeze_64, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, unsqueeze_91, view_31, view_32, sqrt_14, unsqueeze_88, view_26, view_30, squeeze_65, convolution_15, relu_10, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, var_mean_16, view_27, unsqueeze_86, primals_43, sub_13, primals_48, squeeze_64, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf87, i1 + 32 * i0) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, sqrt_16, unsqueeze_91, view_31, view_32, sqrt_14, unsqueeze_88, view_26, view_30, add_38, convolution_15, relu_10, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, reciprocal_16, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, var_mean_16, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, sqrt_16, unsqueeze_91, view_31, view_32, sqrt_14, unsqueeze_88, view_26, view_30, add_38, convolution_15, relu_10, squeeze_66, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, unsqueeze_90, add_35, unsqueeze_82, add_31, reciprocal_16, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, var_mean_16, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_94, primals_47, mul_47, unsqueeze_89, relu_13, unsqueeze_83, mul_41, primals_49, reciprocal_14, var_mean_13, unsqueeze_93, unsqueeze_84, primals_45, add_37, sqrt_13, convolution_13, sqrt_16, unsqueeze_91, view_31, view_32, sqrt_14, unsqueeze_88, view_26, view_30, add_38, convolution_15, relu_10, squeeze_66, unsqueeze_92, add_36, convolution_16, view_29, primals_44, sub_15, squeeze_67, unsqueeze_90, add_35, unsqueeze_82, add_31, reciprocal_16, add_32, var_mean_14, unsqueeze_80, var_mean_15, unsqueeze_95, reciprocal_15, unsqueeze_87, view_28, relu_11, convolution_14, primals_41, add_33, relu_12, primals_42, var_mean_16, view_27, unsqueeze_86, primals_43, sub_13, primals_48, mul_43, unsqueeze_85, unsqueeze_81, add_34, mul_40, unsqueeze_78, mul_46, reciprocal_13, sqrt_15, primals_46, primals_40, mul_44, unsqueeze_79, sub_14}
  )
), TensorBox(StorageBox(
  Convolution(
    name=buf90,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 512, 1, 1]), stride=(512, 1, 1, 1)),
    inputs=[ComputedBuffer(name='buf89', layout=FixedLayout('cuda', torch.float32, size=(2, 256, 2, 2), stride=[1024, 1, 256, 512]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf76, i3 + 2 * i2 + 4 * i1 + 1024 * i0),
      ranges=(2, 256, 2, 2),
      origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, primals_44, mul_43, convolution_17, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
    )), InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[512, 256, 1, 1], stride=[256, 1, 1, 1]))],
    constant_args=(None, (2, 2), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, primals_44, mul_43, convolution_17, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf93, i1 + 32 * i0) / index_expr(16, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, var_mean_17, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_34, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, primals_44, mul_43, convolution_17, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_89, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, var_mean_17, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_34, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, primals_44, mul_43, convolution_17, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, squeeze_68, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_83, unsqueeze_89, mul_41, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, var_mean_17, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_34, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, primals_44, mul_43, convolution_17, unsqueeze_85, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, squeeze_69, unsqueeze_82, add_31, squeeze_68, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf92, i1 + 32 * i0) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={unsqueeze_89, unsqueeze_83, mul_41, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, var_mean_17, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_34, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, add_40, primals_44, mul_43, convolution_17, unsqueeze_85, reciprocal_17, sqrt_17, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={unsqueeze_83, unsqueeze_89, mul_41, squeeze_70, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, var_mean_17, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_34, view_27, unsqueeze_86, primals_43, sub_13, relu_10, view_29, add_40, primals_44, mul_43, convolution_17, unsqueeze_85, reciprocal_17, sqrt_17, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, primals_40, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={unsqueeze_89, unsqueeze_83, mul_41, squeeze_70, reciprocal_14, var_mean_13, primals_52, unsqueeze_84, primals_45, sqrt_13, unsqueeze_87, view_28, convolution_13, relu_11, convolution_14, primals_41, var_mean_17, sqrt_14, add_33, relu_12, primals_42, view_26, unsqueeze_88, view_34, view_27, unsqueeze_86, primals_40, primals_43, sub_13, relu_10, view_29, add_40, primals_44, mul_43, convolution_17, unsqueeze_85, reciprocal_17, sqrt_17, add_35, unsqueeze_81, add_34, mul_40, unsqueeze_78, unsqueeze_82, add_31, reciprocal_13, add_32, var_mean_14, squeeze_71, unsqueeze_80, mul_44, unsqueeze_79, sub_14}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf95', layout=FixedLayout('cuda', torch.float32, size=(2, 512, 1, 1), stride=[512, 1, 512, 512]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf94, i1 + 512 * i0)),
    ranges=(2, 512, 1, 1),
    origins={relu_14}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf97,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 512, 1, 1]), stride=(512, 1, 1, 1)),
    inputs=[ComputedBuffer(name='buf95', layout=FixedLayout('cuda', torch.float32, size=(2, 512, 1, 1), stride=[512, 1, 512, 512]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf94, i1 + 512 * i0)),
      ranges=(2, 512, 1, 1),
      origins={relu_14}
    )), ComputedBuffer(name='buf96', layout=FixedLayout('cuda', torch.float32, size=[512, 512, 3, 3], stride=[512, 1, 262144, 786432]), data=Pointwise(
      'cuda',
      torch.float32,
      load(primals_55, i3 + 3 * i2 + 9 * i1 + 4608 * i0),
      ranges=[512, 512, 3, 3],
      origins={convolution_18, primals_55, relu_14}
    ))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_18, primals_55, relu_14}
  )
)), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          load(buf100, i1 + 32 * i0) / index_expr(16, torch.float32),
          ranges=[2, 32, 1, 1],
          origins={primals_55, relu_14, convolution_18, var_mean_18, view_36}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={primals_55, relu_14, squeeze_72, convolution_18, var_mean_18, view_36}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={squeeze_73, primals_55, squeeze_72, relu_14, convolution_18, var_mean_18, view_36}
  )
), TensorBox(
  View(
    View(
      StorageBox(
        Pointwise(
          'cuda',
          torch.float32,
          reciprocal(sqrt(load(buf99, i1 + 32 * i0) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))),
          ranges=[2, 32, 1, 1],
          origins={primals_55, convolution_18, view_36, relu_14, var_mean_18, reciprocal_18, sqrt_18, add_43}
        )
      ),
      size=(2, 32, 1),
      reindex=lambda i0, i1, i2: [i0, i1, 0, 0],
      origins={primals_55, convolution_18, squeeze_74, view_36, relu_14, var_mean_18, reciprocal_18, sqrt_18, add_43}
    ),
    size=(2, 32),
    reindex=lambda i0, i1: [i0, i1, 0],
    origins={primals_55, squeeze_75, convolution_18, squeeze_74, view_36, relu_14, var_mean_18, reciprocal_18, sqrt_18, add_43}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=(2, 512, 1, 1), stride=[512, 1, 512, 512]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf97, 16 * ModularIndexing(i1, 16, 32) + 512 * i0 + ModularIndexing(i1, 1, 16)) - load(buf100, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) * reciprocal(sqrt(load(buf99, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))) * load(primals_56, i1) + load(primals_57, i1)),
    ranges=(2, 512, 1, 1),
    origins={unsqueeze_110, mul_56, mul_55, view_37, sub_18, relu_14, unsqueeze_109, var_mean_18, unsqueeze_113, primals_57, add_43, relu_15, unsqueeze_108, primals_55, unsqueeze_112, convolution_18, view_36, add_44, primals_56, reciprocal_18, sqrt_18, unsqueeze_111}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf103,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 512, 1, 1]), stride=(512, 1, 1, 1)),
    inputs=[ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=(2, 512, 1, 1), stride=[512, 1, 512, 512]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf97, 16 * ModularIndexing(i1, 16, 32) + 512 * i0 + ModularIndexing(i1, 1, 16)) - load(buf100, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) * reciprocal(sqrt(load(buf99, 32 * i0 + ModularIndexing(i1, 16, 32)) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))) * load(primals_56, i1) + load(primals_57, i1)),
      ranges=(2, 512, 1, 1),
      origins={unsqueeze_110, mul_56, mul_55, view_37, sub_18, relu_14, unsqueeze_109, var_mean_18, unsqueeze_113, primals_57, add_43, relu_15, unsqueeze_108, primals_55, unsqueeze_112, convolution_18, view_36, add_44, primals_56, reciprocal_18, sqrt_18, unsqueeze_111}
    )), ComputedBuffer(name='buf102', layout=FixedLayout('cuda', torch.float32, size=[512, 512, 3, 3], stride=[512, 1, 262144, 786432]), data=Pointwise(
      'cuda',
      torch.float32,
      load(primals_58, i3 + 3 * i2 + 9 * i1 + 4608 * i0),
      ranges=[512, 512, 3, 3],
      origins={unsqueeze_110, mul_56, view_37, sub_18, relu_14, unsqueeze_109, var_mean_18, primals_57, unsqueeze_113, primals_58, add_43, unsqueeze_108, relu_15, primals_55, unsqueeze_112, convolution_18, view_36, add_44, primals_56, mul_55, convolution_19, reciprocal_18, sqrt_18, unsqueeze_111}
    ))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_110, mul_56, view_37, sub_18, relu_14, unsqueeze_109, var_mean_18, primals_57, unsqueeze_113, primals_58, add_43, unsqueeze_108, relu_15, primals_55, unsqueeze_112, convolution_18, view_36, add_44, primals_56, mul_55, convolution_19, reciprocal_18, sqrt_18, unsqueeze_111}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf107', layout=FixedLayout('cuda', torch.float32, size=[2, 32, 1, 1], stride=[32, 1, 64, 64]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf106, i1 + 32 * i0) / index_expr(16, torch.float32),
    ranges=[2, 32, 1, 1],
    origins={unsqueeze_110, mul_56, var_mean_19, view_37, sub_18, relu_14, unsqueeze_109, var_mean_18, primals_57, primals_58, unsqueeze_113, add_43, unsqueeze_108, relu_15, primals_55, unsqueeze_112, convolution_18, view_36, add_44, primals_56, view_38, mul_55, convolution_19, reciprocal_18, sqrt_18, unsqueeze_111}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 32, 1, 1], stride=[32, 1, 64, 64]), data=Pointwise(
    'cuda',
    torch.float32,
    reciprocal(sqrt(load(buf105, i1 + 32 * i0) / index_expr(16, torch.float32) + constant(1e-05, torch.float32))),
    ranges=[2, 32, 1, 1],
    origins={unsqueeze_110, mul_56, var_mean_19, mul_55, view_37, reciprocal_19, sqrt_19, sub_18, relu_14, unsqueeze_109, var_mean_18, unsqueeze_113, primals_58, primals_57, add_43, unsqueeze_108, relu_15, primals_55, unsqueeze_112, convolution_18, view_36, add_44, view_38, primals_56, convolution_19, add_45, reciprocal_18, sqrt_18, unsqueeze_111}
  ))
)), TensorBox(
  View(
    StorageBox(
      ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 1, 1], stride=[512, 1, 1, 1]), data=Pointwise(
        'cuda',
        torch.float32,
        load(buf109, i1 + 512 * i0) / index_expr(1, torch.float32),
        ranges=[2, 512, 1, 1],
        origins={mul_56, unsqueeze_116, sub_19, add_46, view_37, add_47, sqrt_19, sub_18, unsqueeze_109, unsqueeze_118, var_mean_18, unsqueeze_113, primals_58, primals_57, unsqueeze_108, primals_60, convolution_18, unsqueeze_114, unsqueeze_119, primals_59, convolution_19, add_45, unsqueeze_111, unsqueeze_110, var_mean_19, mean, mul_55, reciprocal_19, relu_14, view_39, relu_16, add_43, relu_15, primals_55, unsqueeze_112, mul_58, mul_59, reciprocal_18, view_36, add_44, unsqueeze_117, view_38, primals_56, unsqueeze_115, sqrt_18}
      ))
    ),
    size=(2, 512),
    reindex=lambda i0, i1: [i0, i1, 0, 0],
    origins={mul_56, unsqueeze_116, sub_19, add_46, view_37, add_47, sqrt_19, sub_18, unsqueeze_109, unsqueeze_118, var_mean_18, primals_57, primals_58, unsqueeze_113, unsqueeze_108, primals_60, convolution_18, unsqueeze_114, unsqueeze_119, primals_59, convolution_19, add_45, view_40, unsqueeze_111, unsqueeze_110, var_mean_19, mean, mul_55, reciprocal_19, relu_14, view_39, relu_16, add_43, relu_15, primals_55, unsqueeze_112, mul_58, mul_59, reciprocal_18, view_36, add_44, unsqueeze_117, view_38, primals_56, unsqueeze_115, sqrt_18}
  )
), TensorBox(
  ReinterpretView(
    StorageBox(
      InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[1000, 512], stride=[512, 1]))
    ),
    FixedLayout('cuda', torch.float32, size=[1000, 512], stride=[512, 1]),
    no origins?
  )
), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf109, i1 + 512 * i0) <= constant(0, torch.float32),
    ranges=(2, 512, 1, 1),
    origins={mul_56, unsqueeze_116, sub_19, add_46, view_37, add_47, sqrt_19, sub_18, unsqueeze_109, unsqueeze_118, var_mean_18, unsqueeze_113, primals_58, primals_57, unsqueeze_108, primals_60, convolution_18, unsqueeze_114, unsqueeze_119, primals_59, convolution_19, add_45, unsqueeze_111, unsqueeze_110, var_mean_19, mul_55, reciprocal_19, relu_14, view_39, relu_16, add_43, relu_15, primals_55, unsqueeze_112, le, mul_58, mul_59, reciprocal_18, view_36, add_44, unsqueeze_117, view_38, primals_56, unsqueeze_115, sqrt_18}
  )
)), s0, 16, 16, 256, 8, 8, 64, 8, 8, 64, 8, 8, 64, 8, 8, 64, 4, 4, 16, 4, 4, 16, 4, 4, 16, 4, 4, 16, 4, 4, 16, 2, 2, 4, 2, 2, 4, 2, 2, 4, 2, 2, 4, 2, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

While executing return [addmm, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_63, convolution, squeeze_1, squeeze_3, relu, getitem_2, getitem_3, convolution_1, squeeze_5, squeeze_7, relu_1, convolution_2, squeeze_9, squeeze_11, relu_2, convolution_3, squeeze_13, squeeze_15, relu_3, convolution_4, squeeze_17, squeeze_19, relu_4, convolution_5, squeeze_21, squeeze_23, relu_5, convolution_6, squeeze_25, squeeze_27, convolution_7, squeeze_29, squeeze_31, relu_6, convolution_8, squeeze_33, squeeze_35, relu_7, convolution_9, squeeze_37, squeeze_39, relu_8, convolution_10, squeeze_41, squeeze_43, relu_9, convolution_11, squeeze_45, squeeze_47, convolution_12, squeeze_49, squeeze_51, relu_10, convolution_13, squeeze_53, squeeze_55, relu_11, convolution_14, squeeze_57, squeeze_59, relu_12, convolution_15, squeeze_61, squeeze_63, relu_13, convolution_16, squeeze_65, squeeze_67, convolution_17, squeeze_69, squeeze_71, relu_14, convolution_18, squeeze_73, squeeze_75, relu_15, convolution_19, getitem_41, reciprocal_19, view_40, permute_1, le, sym_size, sym_size_1, sym_size_2, mul, sym_size_3, sym_size_4, mul_3, sym_size_5, sym_size_6, mul_6, sym_size_7, sym_size_8, mul_9, sym_size_9, sym_size_10, mul_12, sym_size_11, sym_size_12, mul_15, sym_size_13, sym_size_14, mul_18, sym_size_15, sym_size_16, mul_21, sym_size_17, sym_size_18, mul_24, sym_size_19, sym_size_20, mul_27, sym_size_21, sym_size_22, mul_30, sym_size_23, sym_size_24, mul_33, sym_size_25, sym_size_26, mul_36, sym_size_27, sym_size_28, mul_39, sym_size_29, sym_size_30, mul_42, sym_size_31, sym_size_32, mul_45, sym_size_33, sym_size_34, mul_48, sym_size_35, sym_size_36, mul_51, sym_size_37, sym_size_38, mul_54, sym_size_39, sym_size_40, mul_57]
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train functorch_dp_cifar10               FAIL
Running torchbench.py functorch_maml_omniglot...
cuda train functorch_maml_omniglot            PASS
Running torchbench.py hf_Albert...
cuda train hf_Albert                          PASS
Running torchbench.py hf_Bart...
ERROR:common:
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 910, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/user_defined.py", line 245, in call_function
    return VariableBuilder(tx, source).wrap_unspecialized_primitive(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 620, in wrap_unspecialized_primitive
    unspec_var = wrap_fx_proxy_cls(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 699, in wrap_fx_proxy_cls
    example_value = fake_wrapper(example_value)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 752, in wrap_to_fake_tensor_and_record
    if source and source.guard_source().is_nn_module():
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/source.py", line 43, in guard_source
    raise NotImplementedError()
NotImplementedError:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/bart/modeling_bart.py", line 827, in <graph break in forward>
    dropout_probability = random.uniform(0, 1)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/bart/modeling_bart.py", line 1353, in forward
    outputs = self.model(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/bart/modeling_bart.py", line 1222, in forward
    encoder_outputs = self.encoder(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/bart/modeling_bart.py", line 801, in forward
    embed_pos = self.embed_positions(input_shape)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 466, in _compile
    raise InternalTorchDynamoError() from e
torch._dynamo.exc.InternalTorchDynamoError
TorchDynamo optimized model failed to run because of following error
cuda train hf_Bart                            FAIL
Running torchbench.py hf_Bert...
cuda train hf_Bert                            PASS
Running torchbench.py hf_BigBird...
ERROR:common:Output 0 of CompiledFunctionBackward is a view and is being modified inplace. This view was created inside a custom Function (or because an input was returned as-is) and the autograd logic to handle view+inplace would override the custom backward associated with the custom Function, leading to incorrect gradients. This behavior is forbidden. You can fix this by cloning the output of the custom Function.
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2462, in forward
    outputs = self.bert(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2148, in forward
    encoder_outputs = self.encoder(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 1641, in forward
    layer_outputs = layer_module(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 1493, in forward
    self_attention_outputs = self.attention(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 1406, in forward
    self_outputs = self.self(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 475, in forward
    context_layer, attention_probs = self.bigbird_block_sparse_attention(
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 573, in bigbird_block_sparse_attention
    np.random.seed(seed)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 635, in <graph break in bigbird_block_sparse_attention>
    first_context_layer.unsqueeze_(2)
RuntimeError: Output 0 of CompiledFunctionBackward is a view and is being modified inplace. This view was created inside a custom Function (or because an input was returned as-is) and the autograd logic to handle view+inplace would override the custom backward associated with the custom Function, leading to incorrect gradients. This behavior is forbidden. You can fix this by cloning the output of the custom Function.
TorchDynamo optimized model failed to run because of following error
cuda train hf_BigBird                         FAIL
Running torchbench.py hf_DistilBert...
cuda train hf_DistilBert                      PASS
Running torchbench.py hf_GPT2...
[2022-11-19 14:50:40,264] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 123, in compile_fx_inner
    compiled_fn = graph.compile_to_fn()
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 384, in compile_to_fn
    return self.compile_to_module().call
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 370, in compile_to_module
    code = self.codegen()
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 361, in codegen
    self.wrapper_code = WrapperCodeGen()
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/codegen/wrapper.py", line 242, in __init__
    V.graph.sizevars.codegen(self.prefix, V.graph.graph_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/sizevars.py", line 484, in codegen
    assert shape in added, f"{shape} is needed but not added"
AssertionError: s1 is needed but not added
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 1048, in forward
    transformer_outputs = self.transformer(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 793, in forward
    position_ids = torch.arange(past_length, input_shape[-1] + past_length, dtype=torch.long, device=device)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 891, in <graph break in forward>
    outputs = block(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 391, in forward
    attn_outputs = self.attn(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 332, in forward
    attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 472, in step
    self.output.compile_subgraph(self, partial_convert=True)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train hf_GPT2                            FAIL
Running torchbench.py hf_GPT2_large...
cuda train hf_GPT2_large                      PASS
Running torchbench.py hf_Longformer...
[2022-11-19 14:51:32,023] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
ERROR:common:Expected !is_symbolic() to be true, but got false.  (Could this error message be improved?  If so, please report an enhancement request to PyTorch.)
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1813, in forward
    outputs = self.longformer(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1696, in forward
    padding_len, input_ids, attention_mask, token_type_ids, position_ids, inputs_embeds = self._pad_to_window_size(
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1715, in <graph break in forward>
    encoder_outputs = self.encoder(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1265, in forward
    is_global_attn = is_index_global_attn.flatten().any().item()
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1297, in <graph break in forward>
    layer_outputs = layer_module(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1221, in forward
    self_attn_outputs = self.attention(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 1157, in forward
    self_outputs = self.self(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py", line 542, in forward
    def forward(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1537, in forward
    return compiled_function(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1507, in compiled_function
    return aot_dispatcher_function(args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 570, in g
    return f(*args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1125, in compiled_function
    fw_outs_including_aliases.append(input_alias.as_strided(out_tensor_meta.size(), out_tensor_meta.stride(), out_tensor_meta.storage_offset()))
RuntimeError: Expected !is_symbolic() to be true, but got false.  (Could this error message be improved?  If so, please report an enhancement request to PyTorch.)
TorchDynamo optimized model failed to run because of following error
cuda train hf_Longformer                      FAIL
Running torchbench.py hf_Reformer...
[2022-11-19 14:51:42,702] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 851, in aot_dispatch_autograd
    _fw_metadata, out = run_functionalized_fw_and_collect_metadata(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 225, in inner
    outs = f(*f_args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 852, in <lambda>
    lambda *args: flat_fn(*(add_dupe_args(args))),
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1479, in functional_call
    out = Interpreter(mod).run(*args[params_len:], **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 243, in call_function
    return target(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/overrides.py", line 951, in lowmem_dropout
    result = LowmemDropout.apply(input, p)
RuntimeError: Cannot call sizes() on tensor with symbolic sizes/strides

While executing %lowmem_dropout : [#users=1] = call_function[target=torch._inductor.overrides.lowmem_dropout](args = (%self_word_embeddings : META IS MISSING, INVESTIGATE,), kwargs = {p: 0.05, training: True})
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py", line 2397, in forward
    reformer_outputs = self.reformer(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py", line 2063, in forward
    least_common_mult_chunk_length = _get_least_common_mult_chunk_len(self.config)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py", line 2100, in <graph break in forward>
    embedding_output = self.embeddings(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py", line 249, in forward
    position_ids = torch.arange(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train hf_Reformer                        FAIL
Running torchbench.py hf_T5...
WARNING:common:fp64 golden ref were not generated for hf_T5
ERROR:common:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 529, in <graph break in forward>
    scores += position_bias

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1087, in run_node
    return node.target(*args, **kwargs)
RuntimeError: Output 0 of AsStridedBackward0 is a view of a view which was created in no_grad mode and is being modified inplace with grad mode enabled. Given that this use case is ambiguous and error-prone, it is forbidden. You can clarify your code by moving both the view and the inplace either both inside the no_grad block (if you don't want the inplace to be tracked) or both outside (if you want the inplace to be tracked).

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1053, in get_fake_value
    return wrap_fake_exception(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 737, in wrap_fake_exception
    return fn()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1054, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_function <built-in function iadd>(*(FakeTensor(FakeTensor(..., device='meta', size=(s2, s1, s0, s0),
           grad_fn=<AsStridedBackward0>), cuda:0), FakeTensor(FakeTensor(..., device='meta', size=(s2, s1, s0, s0), grad_fn=<AddBackward0>), cuda:0)), **{}):
Output 0 of AsStridedBackward0 is a view of a view which was created in no_grad mode and is being modified inplace with grad mode enabled. Given that this use case is ambiguous and error-prone, it is forbidden. You can clarify your code by moving both the view and the inplace either both inside the no_grad block (if you don't want the inplace to be tracked) or both outside (if you want the inplace to be tracked).
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 945, in forward
    attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in <graph break in forward>
    layer_outputs = layer_module(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 120, in impl
    self.push(fn_var.call_function(self, self.popn(nargs), {}))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builtin.py", line 320, in call_function
    return wrap_fx_proxy(tx, proxy, **options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 650, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 691, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 529, in <graph break in forward>
    scores += position_bias

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train hf_T5                              FAIL
Running torchbench.py hf_T5_base...
WARNING:common:fp64 golden ref were not generated for hf_T5_base
ERROR:common:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 529, in <graph break in forward>
    scores += position_bias

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1087, in run_node
    return node.target(*args, **kwargs)
RuntimeError: Output 0 of AsStridedBackward0 is a view of a view which was created in no_grad mode and is being modified inplace with grad mode enabled. Given that this use case is ambiguous and error-prone, it is forbidden. You can clarify your code by moving both the view and the inplace either both inside the no_grad block (if you don't want the inplace to be tracked) or both outside (if you want the inplace to be tracked).

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1053, in get_fake_value
    return wrap_fake_exception(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 737, in wrap_fake_exception
    return fn()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1054, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_function <built-in function iadd>(*(FakeTensor(FakeTensor(..., device='meta', size=(s2, s1, s0, s0),
           grad_fn=<AsStridedBackward0>), cuda:0), FakeTensor(FakeTensor(..., device='meta', size=(s2, s1, s0, s0), grad_fn=<AddBackward0>), cuda:0)), **{}):
Output 0 of AsStridedBackward0 is a view of a view which was created in no_grad mode and is being modified inplace with grad mode enabled. Given that this use case is ambiguous and error-prone, it is forbidden. You can clarify your code by moving both the view and the inplace either both inside the no_grad block (if you don't want the inplace to be tracked) or both outside (if you want the inplace to be tracked).
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
    return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
    encoder_outputs = self.encoder(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 945, in forward
    attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in <graph break in forward>
    layer_outputs = layer_module(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
    self_attention_outputs = self.layer[0](
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
    attention_output = self.SelfAttention(
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
    position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 120, in impl
    self.push(fn_var.call_function(self, self.popn(nargs), {}))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builtin.py", line 320, in call_function
    return wrap_fx_proxy(tx, proxy, **options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 650, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 691, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 529, in <graph break in forward>
    scores += position_bias

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train hf_T5_base                         FAIL
Running torchbench.py hf_T5_large...
cuda train hf_T5_large                        PASS
Running torchbench.py lennard_jones...
cuda train lennard_jones                      PASS
Running torchbench.py maml_omniglot...
cuda train maml_omniglot                      PASS
Running torchbench.py mnasnet1_0...
cuda train mnasnet1_0                         PASS
Running torchbench.py mobilenet_v2...
cuda train mobilenet_v2                       PASS
Running torchbench.py mobilenet_v2_quantized_qat...
WARNING:common:fp64 golden ref were not generated for mobilenet_v2_quantized_qat
[2022-11-19 14:55:13,456] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,468] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,488] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,496] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,506] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,524] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,532] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,541] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,560] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,570] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,589] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,597] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,606] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,625] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,633] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,646] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,664] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,674] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,693] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,701] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,710] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,728] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,736] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,746] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,763] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,770] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,779] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,796] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,805] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,813] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,833] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,842] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,851] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,872] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,881] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,901] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,909] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,918] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,936] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,945] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,954] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,972] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,980] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:13,989] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,006] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,016] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,024] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,042] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,051] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,060] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,077] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,085] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,093] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,112] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,120] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,129] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,149] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,161] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,170] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,191] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,200] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,220] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,229] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,239] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,257] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,267] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,276] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,295] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,303] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,312] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,331] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,342] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,351] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,370] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,380] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,390] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,409] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,418] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,427] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,447] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,457] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,473] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,492] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,503] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,512] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,531] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,539] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,548] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,569] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,578] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,589] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,608] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,617] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,626] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,647] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,658] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,678] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,688] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,698] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,717] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,727] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,737] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,757] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,765] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,775] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,802] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,812] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,821] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,842] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,851] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,862] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,881] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,889] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,899] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,919] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,929] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,939] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,961] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,972] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:14,983] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,006] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,018] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,040] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,053] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,063] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,085] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,096] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,108] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,128] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,150] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,160] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,183] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,195] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,207] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,227] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,240] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,250] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,271] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,280] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,290] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,313] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,324] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,336] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,357] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,368] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,378] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,402] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,414] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,437] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,450] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,460] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,468] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,475] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,485] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 14:55:15,492] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
ERROR:common:expected size 32==0, stride 1==1 at dim=0
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/graph_module.py", line 660, in call_wrapped
    return self._wrapped_call(self, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/graph_module.py", line 279, in __call__
    raise e
  File "/scratch/ezyang/work/b/pytorch/torch/fx/graph_module.py", line 269, in __call__
    return super(self.cls, obj).__call__(*args, **kwargs)  # type: ignore[misc]
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "<eval_with_key>.8", line 4, in forward
    def forward(self, x : torch.Tensor) -> torch.Tensor:
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1537, in forward
    return compiled_function(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1507, in compiled_function
    return aot_dispatcher_function(args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 570, in g
    return f(*args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1065, in compiled_function
    outs = CompiledFunction.apply(*no_dupe_args_with_synthetic_bases)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 977, in forward
    fw_outs = call_func_with_args(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 595, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 194, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/ho/chovb3prfgw54v2xzxl5h4ouizsh56al4megifmzabjk2l5qpryi.py", line 3379, in call
    assert_size_stride(buf19, (0, ), (1, ))
AssertionError: expected size 32==0, stride 1==1 at dim=0
TorchDynamo optimized model failed to run because of following error
cuda train mobilenet_v2_quantized_qat         FAIL
Running torchbench.py mobilenet_v3_large...
cuda train mobilenet_v3_large                 PASS
Running torchbench.py moco...
ERROR:common:

from user code:
   File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 172, in concat_all_gather
    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1087, in run_node
    return node.target(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/distributed/distributed_c10d.py", line 1346, in wrapper
    return func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/distributed/distributed_c10d.py", line 2341, in all_gather
    work = default_pg.allgather([tensor_list], [tensor])
  File "/scratch/ezyang/work/b/pytorch/torch/_subclasses/fake_tensor.py", line 875, in __torch_dispatch__
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_ops.py", line 297, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Tensors must be CUDA and dense

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1053, in get_fake_value
    return wrap_fake_exception(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 737, in wrap_fake_exception
    return fn()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1054, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_function <function all_gather at 0x7fca599627a0>(*([FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, s2, s2)), cuda:0)], FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, s2, s2)), cuda:0)), **{'async_op': False}):
Tensors must be CUDA and dense
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/parallel/distributed.py", line 1098, in forward
    output = self._run_ddp_forward(*inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/parallel/distributed.py", line 1051, in _run_ddp_forward
    return module_to_run(*inputs[0], **kwargs[0])  # type: ignore[index]
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 130, in forward
    self._momentum_update_key_encoder()  # update the key encoder
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 133, in <graph break in forward>
    im_k, idx_unshuffle = self._batch_shuffle_ddp(im_k)
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 76, in _batch_shuffle_ddp
    x_gather = concat_all_gather(x)
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 959, in CALL_FUNCTION_KW
    self.call_function(fn, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/torch.py", line 417, in call_function
    tensor_variable = wrap_fx_proxy(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 650, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 691, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 172, in concat_all_gather
    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train moco                               FAIL
Running torchbench.py nvidia_deeprecommender...
[2022-11-19 14:56:53,549] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: aten.expm1.default
  args[0]: TensorBox(StorageBox(
    Pointwise(
      'cuda',
      torch.float32,
      load(buf0, i1 + 512 * i0) * constant(1.0, torch.float32),
      ranges=[s0, 512],
      origins={permute, primals_1, primals_4, mul_1, primals_13, addmm}
    )
  ))
[2022-11-19 14:56:53,550] torch._inductor.lowering: [WARNING] make_fallback(aten.expm1.default): a decomposition exists, we should switch to it
[2022-11-19 14:56:53,556] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:53,562] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:53,569] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:53,576] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:53,583] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:53,590] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:54,128] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:54,133] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:54,139] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:54,145] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
[2022-11-19 14:56:54,150] torch._inductor.ir: [WARNING] Using FallbackKernel: torch.ops.aten.expm1.default
cuda train nvidia_deeprecommender             PASS
Running torchbench.py pytorch_CycleGAN_and_pix2pix...
[2022-11-19 14:57:16,778] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 282, in output
    assert all(
AssertionError: [TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    reciprocal(exp(load(buf170, i3 + 256 * i2 + 65536 * i1) + load(primals_48, i1) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
    ranges=torch.Size([1, 3, 256, 256]),
    origins={reciprocal_23, primals_47, mul_70, primals_48, add_55, exp, reflection_pad2d_19, mul_69, sub_23, convolution_23}
  )
)), TensorBox(StorageBox(
  InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 7, 7], stride=[147, 49, 7, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_3', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_9', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_15', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_21', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_27', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_33', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_39', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_45', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[3, 64, 7, 7], stride=[3136, 49, 7, 1]))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf0', layout=FixedLayout('cuda', torch.float32, size=[1, 3, 262, 262], stride=[205932, 68644, 262, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_49, constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i3, torch.int32) - constant(3, torch.int32))) + constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i2, torch.int32) - constant(3, torch.int32))) * s1 + i1 * s1**2),
    ranges=[1, 3, 262, 262],
    origins={reflection_pad2d}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf2', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf1, i3 + 256 * i2 + 65536 * i1) + load(primals_2, i1),
    ranges=torch.Size([1, 64, 256, 256]),
    origins={convolution, primals_2, primals_1, reflection_pad2d}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf6, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 64, 1, 1],
            origins={primals_2, add, var, convolution, reciprocal, view, primals_1, reflection_pad2d, sqrt}
          )
        ),
        size=(1, 64, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={primals_2, add, var, convolution, reciprocal, view, primals_1, reflection_pad2d, squeeze_3, sqrt}
      ),
      size=(1, 64),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={primals_2, add, var, sqrt, convolution, reciprocal, view, primals_1, reflection_pad2d, squeeze_3, squeeze_4}
    ),
    size=(64,),
    reindex=lambda i0: [0, i0],
    origins={squeeze_5, primals_2, add, var, sqrt, convolution, reciprocal, view, primals_1, reflection_pad2d, squeeze_3, squeeze_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf2, i3 + 256 * i2 + 65536 * i1) - load(buf8, i1) / index_expr(65536, torch.float32) * reciprocal(sqrt(load(buf6, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 64, 256, 256]),
    origins={unsqueeze_3, full, mul_1, primals_2, add_1, add, unsqueeze_1, unsqueeze_2, unsqueeze, primals_1, view, reflection_pad2d, sqrt, new_zeros, sub, mean, var, relu, convolution, reciprocal, view_1, mul_2, unsqueeze_5, unsqueeze_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf11', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf10, i3 + 128 * i2 + 16384 * i1) + load(primals_4, i1),
    ranges=torch.Size([1, 128, 128, 128]),
    origins={unsqueeze_3, full, mul_1, primals_2, add_1, add, unsqueeze_1, unsqueeze_2, unsqueeze, primals_1, view, reflection_pad2d, primals_3, sqrt, new_zeros, sub, mean, view_2, var, view_3, relu, convolution, reciprocal, view_1, primals_4, mul_2, convolution_1, unsqueeze_5, unsqueeze_4}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf15, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 128, 1, 1],
            origins={reciprocal_1, mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, var, view_3, view_1, primals_4, var_1, convolution_1, add_2, sqrt_1}
          )
        ),
        size=(1, 128, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reciprocal_1, mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, var, view_3, view_1, squeeze_9, primals_4, var_1, convolution_1, add_2, sqrt_1}
      ),
      size=(1, 128),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={reciprocal_1, mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, squeeze_10, view_2, var, view_3, view_1, squeeze_9, primals_4, var_1, convolution_1, add_2, sqrt_1}
    ),
    size=(128,),
    reindex=lambda i0: [0, i0],
    origins={reciprocal_1, mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, squeeze_11, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, squeeze_10, view_2, var, view_3, view_1, squeeze_9, primals_4, var_1, convolution_1, add_2, sqrt_1}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf11, i3 + 128 * i2 + 16384 * i1) - load(buf17, i1) / index_expr(16384, torch.float32) * reciprocal(sqrt(load(buf15, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 128, 128, 128]),
    origins={reciprocal_1, mul_1, sub_1, add, primals_1, new_zeros, view_6, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf20', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf19, i3 + 64 * i2 + 4096 * i1) + load(primals_6, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mul_1, sub_1, add, primals_1, primals_6, primals_5, new_zeros, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf22, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={add_4, reciprocal_1, mul_1, sub_1, add, primals_1, primals_6, primals_5, reciprocal_2, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, var_2, unsqueeze_3, sqrt_2, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={add_4, reciprocal_1, mul_1, sub_1, add, primals_1, primals_6, primals_5, reciprocal_2, squeeze_15, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, var_2, unsqueeze_3, sqrt_2, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={add_4, reciprocal_1, mul_1, sub_1, squeeze_16, add, primals_1, primals_6, primals_5, reciprocal_2, squeeze_15, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, var_2, unsqueeze_3, sqrt_2, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={add_4, reciprocal_1, mul_1, sub_1, squeeze_16, add, primals_1, primals_6, primals_5, reciprocal_2, squeeze_15, new_zeros, view_10, view_7, view_8, view_6, relu, squeeze_17, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, var_2, unsqueeze_3, sqrt_2, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf24', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf20, i3 + 64 * i2 + 4096 * i1) - load(buf23, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf22, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, sub_1, primals_6, sub_2, primals_5, reciprocal_2, unsqueeze_16, view_10, view_7, view_8, view_6, convolution, convolution_2, var_2, sqrt_2, unsqueeze_9, primals_2, relu_1, unsqueeze_2, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, new_zeros_1, mean, var, mul_4, var_1, add_2, mean_1, add_4, mul_1, add, primals_1, new_zeros_2, new_zeros, unsqueeze_17, mul_7, relu_2, relu, reciprocal, add_5, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, mul_8, full, add_1, unsqueeze_1, unsqueeze, unsqueeze_15, mean_2, view_11, unsqueeze_10, view_5, view_2, view_3, view_1, primals_4, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf25', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf24, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf27', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf26, i3 + 64 * i2 + 4096 * i1) + load(primals_8, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_7, convolution_3, primals_8, reflection_pad2d_1}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf29, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={sqrt_3, primals_8, add_6, reflection_pad2d_1, reciprocal_3, primals_7, convolution_3, var_3, view_15}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={sqrt_3, primals_8, add_6, reflection_pad2d_1, reciprocal_3, primals_7, convolution_3, squeeze_21, var_3, view_15}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={sqrt_3, primals_8, add_6, reflection_pad2d_1, reciprocal_3, primals_7, convolution_3, squeeze_21, squeeze_22, var_3, view_15}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={sqrt_3, primals_8, add_6, reflection_pad2d_1, reciprocal_3, primals_7, convolution_3, squeeze_21, squeeze_22, squeeze_23, var_3, view_15}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf31', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf27, i3 + 64 * i2 + 4096 * i1) - load(buf30, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf29, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={unsqueeze_22, full, primals_7, convolution_3, unsqueeze_1, unsqueeze_2, unsqueeze, sub_3, var_3, unsqueeze_21, view_15, sqrt_3, primals_8, add_6, reflection_pad2d_1, new_zeros_3, reciprocal_3, relu_3, mul_10, add_7, mean_3, mul_11, unsqueeze_23, view_16}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf31, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_2}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf34', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf33, i3 + 64 * i2 + 4096 * i1) + load(primals_10, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_10, reflection_pad2d_2, convolution_4, primals_9}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf36, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={var_4, reciprocal_4, convolution_4, primals_10, primals_9, reflection_pad2d_2, view_20, sqrt_4, add_8}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={var_4, squeeze_27, reciprocal_4, convolution_4, primals_10, primals_9, reflection_pad2d_2, view_20, sqrt_4, add_8}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={var_4, squeeze_27, reciprocal_4, squeeze_28, convolution_4, primals_10, primals_9, reflection_pad2d_2, view_20, sqrt_4, add_8}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={var_4, squeeze_27, reciprocal_4, convolution_4, squeeze_28, reflection_pad2d_2, primals_10, primals_9, squeeze_29, view_20, sqrt_4, add_8}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf38', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf24, i3 + 64 * i2 + 4096 * i1) + load(buf34, i3 + 64 * i2 + 4096 * i1) - load(buf37, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf36, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, view_12, sub_1, primals_6, sub_2, primals_5, reciprocal_2, unsqueeze_16, sqrt_4, view_10, view_7, mean_4, reciprocal_4, view_8, view_6, convolution, primals_10, convolution_2, primals_9, reflection_pad2d_2, var_2, var_4, sqrt_2, convolution_4, unsqueeze_9, primals_2, relu_1, unsqueeze_2, view, add_3, mul_5, reflection_pad2d, primals_3, sqrt, mul_13, unsqueeze_11, sub, new_zeros_1, mean, var, unsqueeze_28, mul_4, var_1, add_2, mean_1, add_8, add_4, mul_1, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, add_10, new_zeros_2, new_zeros, unsqueeze_17, mul_7, unsqueeze_27, relu_2, relu, reciprocal, add_5, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, mul_8, full, view_13, add_1, unsqueeze_1, view_21, unsqueeze, unsqueeze_15, mean_2, view_11, view_20, unsqueeze_10, view_5, view_2, view_3, view_1, primals_4, sub_4, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf39', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf38, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_3}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf41', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf40, i3 + 64 * i2 + 4096 * i1) + load(primals_12, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_11, convolution_5, primals_12, reflection_pad2d_3}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf43, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={convolution_5, primals_11, reflection_pad2d_3, reciprocal_5, sqrt_5, view_22, var_5, add_11, primals_12}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={convolution_5, primals_11, reflection_pad2d_3, reciprocal_5, sqrt_5, squeeze_33, view_22, var_5, add_11, primals_12}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={convolution_5, primals_11, reflection_pad2d_3, reciprocal_5, sqrt_5, squeeze_34, squeeze_33, view_22, var_5, add_11, primals_12}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={convolution_5, primals_11, reflection_pad2d_3, reciprocal_5, sqrt_5, squeeze_34, squeeze_33, view_22, squeeze_35, var_5, add_11, primals_12}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf45', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf41, i3 + 64 * i2 + 4096 * i1) - load(buf44, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf43, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={unsqueeze_35, full, reflection_pad2d_3, unsqueeze_1, unsqueeze_2, unsqueeze, relu_4, add_12, view_22, mul_17, mean_5, sub_5, new_zeros_5, mul_16, convolution_5, add_11, reciprocal_5, view_23, sqrt_5, var_5, unsqueeze_34, primals_11, primals_12, unsqueeze_33}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf46', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf45, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf48', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf47, i3 + 64 * i2 + 4096 * i1) + load(primals_14, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_14, primals_13, reflection_pad2d_4, convolution_6}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf50, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={var_6, primals_13, sqrt_6, reflection_pad2d_4, convolution_6, primals_14, add_13, reciprocal_6, view_27}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={var_6, primals_13, sqrt_6, reflection_pad2d_4, convolution_6, primals_14, add_13, squeeze_39, reciprocal_6, view_27}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={var_6, primals_13, sqrt_6, reflection_pad2d_4, convolution_6, primals_14, add_13, squeeze_39, reciprocal_6, squeeze_40, view_27}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={var_6, primals_13, sqrt_6, reflection_pad2d_4, squeeze_41, convolution_6, primals_14, add_13, squeeze_39, reciprocal_6, squeeze_40, view_27}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf52', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf38, i3 + 64 * i2 + 4096 * i1) + load(buf48, i3 + 64 * i2 + 4096 * i1) - load(buf51, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf50, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, view_12, sub_1, reflection_pad2d_4, primals_6, sub_2, primals_5, reciprocal_2, unsqueeze_16, reciprocal_6, view_10, sqrt_4, view_27, view_7, mean_4, reciprocal_4, view_8, view_6, convolution_6, convolution, primals_10, convolution_2, primals_9, sub_6, reflection_pad2d_2, var_2, var_6, sqrt_2, var_4, sqrt_6, unsqueeze_9, convolution_4, primals_2, unsqueeze_40, relu_1, unsqueeze_2, view, primals_14, add_3, mul_5, reflection_pad2d, primals_3, sqrt, mul_13, unsqueeze_11, sub, new_zeros_1, mean, primals_13, mul_19, var, unsqueeze_28, mul_4, var_1, add_13, add_2, mean_1, add_8, add_4, new_zeros_6, add_14, mul_1, mul_20, add, unsqueeze_41, new_zeros_4, add_9, primals_1, add_15, mul_14, unsqueeze_29, add_10, new_zeros_2, new_zeros, unsqueeze_17, mul_7, unsqueeze_27, relu_2, relu, unsqueeze_39, reciprocal, add_5, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, mul_8, full, view_13, add_1, unsqueeze_1, view_21, unsqueeze, unsqueeze_15, mean_2, view_11, view_28, view_20, unsqueeze_10, view_5, view_2, view_3, view_1, primals_4, sub_4, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf53', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf52, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_5}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf55', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf54, i3 + 64 * i2 + 4096 * i1) + load(primals_16, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_16, primals_15, convolution_7, reflection_pad2d_5}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf57, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={reciprocal_7, var_7, reflection_pad2d_5, primals_16, sqrt_7, primals_15, view_29, add_16, convolution_7}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reciprocal_7, var_7, reflection_pad2d_5, squeeze_45, primals_16, sqrt_7, primals_15, view_29, add_16, convolution_7}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={reciprocal_7, var_7, reflection_pad2d_5, squeeze_45, primals_16, sqrt_7, primals_15, squeeze_46, view_29, add_16, convolution_7}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={reciprocal_7, var_7, reflection_pad2d_5, squeeze_45, primals_16, sqrt_7, primals_15, squeeze_47, squeeze_46, view_29, add_16, convolution_7}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf59', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf55, i3 + 64 * i2 + 4096 * i1) - load(buf58, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf57, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={full, unsqueeze_46, reflection_pad2d_5, primals_16, primals_15, unsqueeze_1, unsqueeze_2, unsqueeze, sub_7, view_29, mul_22, add_16, view_30, convolution_7, unsqueeze_45, reciprocal_7, mean_7, new_zeros_7, var_7, relu_5, sqrt_7, add_17, mul_23, unsqueeze_47}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf60', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf59, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_6}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf62', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf61, i3 + 64 * i2 + 4096 * i1) + load(primals_18, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={convolution_8, reflection_pad2d_6, primals_18, primals_17}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf64, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={var_8, reflection_pad2d_6, convolution_8, primals_17, view_34, primals_18, sqrt_8, add_18, reciprocal_8}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={var_8, reflection_pad2d_6, convolution_8, view_34, primals_18, sqrt_8, add_18, squeeze_51, primals_17, reciprocal_8}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={squeeze_52, var_8, reflection_pad2d_6, convolution_8, view_34, primals_18, sqrt_8, add_18, squeeze_51, primals_17, reciprocal_8}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={squeeze_52, var_8, reflection_pad2d_6, convolution_8, view_34, squeeze_53, primals_18, sqrt_8, add_18, squeeze_51, primals_17, reciprocal_8}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf66', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf52, i3 + 64 * i2 + 4096 * i1) + load(buf62, i3 + 64 * i2 + 4096 * i1) - load(buf65, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf64, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, sub_1, reflection_pad2d_4, primals_6, sub_2, view_34, primals_5, reciprocal_2, unsqueeze_16, reciprocal_6, view_10, view_27, view_7, mean_4, view_8, view_6, convolution_6, primals_10, convolution_2, primals_9, sub_6, var_6, sqrt_2, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, primals_14, add_3, mul_5, unsqueeze_11, primals_17, new_zeros_1, primals_13, mul_19, primals_18, mul_4, add_13, add_2, add_4, new_zeros_6, add_14, mul_20, unsqueeze_41, add_15, new_zeros_2, unsqueeze_17, mul_7, relu_2, unsqueeze_39, add_5, mean_8, mul_8, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, convolution_8, sub_4, unsqueeze_52, view_12, sqrt_4, reciprocal_8, reciprocal_4, convolution, reflection_pad2d_2, reflection_pad2d_6, var_2, var_4, var_8, convolution_4, primals_2, unsqueeze_2, mul_25, view, reflection_pad2d, primals_3, sqrt, mul_13, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_8, mul_1, unsqueeze_51, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, view_35, add_10, new_zeros, unsqueeze_27, relu, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, view_21, unsqueeze, mean_2, view_20, unsqueeze_10, view_5, view_2, view_3, view_1, primals_4, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf66, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_7}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf69', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf68, i3 + 64 * i2 + 4096 * i1) + load(primals_20, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={convolution_9, reflection_pad2d_7, primals_20, primals_19}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf71, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={convolution_9, add_21, var_9, primals_19, sqrt_9, reciprocal_9, reflection_pad2d_7, primals_20, view_36}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={convolution_9, add_21, squeeze_57, var_9, primals_19, sqrt_9, reciprocal_9, reflection_pad2d_7, primals_20, view_36}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={squeeze_58, convolution_9, add_21, squeeze_57, var_9, primals_19, sqrt_9, reciprocal_9, reflection_pad2d_7, primals_20, view_36}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={squeeze_58, convolution_9, add_21, squeeze_57, squeeze_59, var_9, primals_19, sqrt_9, reciprocal_9, reflection_pad2d_7, primals_20, view_36}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf73', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf69, i3 + 64 * i2 + 4096 * i1) - load(buf72, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf71, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={convolution_9, full, unsqueeze_58, view_37, unsqueeze_1, var_9, unsqueeze_2, unsqueeze, sqrt_9, unsqueeze_59, reciprocal_9, unsqueeze_57, reflection_pad2d_7, relu_6, add_22, new_zeros_9, add_21, primals_19, mul_28, mul_29, mean_9, primals_20, sub_9, view_36}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf73, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_8}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf76', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf75, i3 + 64 * i2 + 4096 * i1) + load(primals_22, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reflection_pad2d_8, primals_21, primals_22, convolution_10}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf78, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={primals_22, reflection_pad2d_8, convolution_10, add_23, view_41, reciprocal_10, primals_21, sqrt_10, var_10}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reflection_pad2d_8, primals_22, convolution_10, squeeze_63, view_41, add_23, reciprocal_10, primals_21, sqrt_10, var_10}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={primals_22, reflection_pad2d_8, reciprocal_10, convolution_10, squeeze_63, view_41, add_23, squeeze_64, primals_21, sqrt_10, var_10}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={reflection_pad2d_8, primals_22, reciprocal_10, squeeze_65, convolution_10, squeeze_63, view_41, add_23, squeeze_64, primals_21, sqrt_10, var_10}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf80', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf66, i3 + 64 * i2 + 4096 * i1) + load(buf76, i3 + 64 * i2 + 4096 * i1) - load(buf79, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf78, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, sub_1, reflection_pad2d_4, convolution_10, primals_6, sub_2, view_34, primals_5, reciprocal_2, unsqueeze_16, sqrt_10, reciprocal_6, view_10, var_10, view_27, view_7, unsqueeze_64, mean_4, view_8, view_6, convolution_6, primals_10, convolution_2, add_23, primals_9, sub_6, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, primals_14, add_3, mul_5, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mul_19, new_zeros_10, add_25, add_24, primals_18, mul_4, mul_32, add_13, add_2, unsqueeze_65, primals_21, add_4, new_zeros_6, view_42, add_14, mul_20, unsqueeze_41, add_15, new_zeros_2, unsqueeze_17, mul_7, relu_2, unsqueeze_39, view_41, add_5, mean_8, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, convolution_8, sub_4, unsqueeze_52, view_12, sqrt_4, reciprocal_8, reciprocal_4, convolution, reflection_pad2d_2, reflection_pad2d_6, var_2, var_4, var_8, convolution_4, primals_2, unsqueeze_2, mul_25, view, reflection_pad2d, primals_3, sqrt, mul_13, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_8, mul_1, unsqueeze_51, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, mean_10, view_35, add_10, new_zeros, unsqueeze_27, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, view_21, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, view_3, view_1, primals_4, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf80, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_9}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf83', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf82, i3 + 64 * i2 + 4096 * i1) + load(primals_24, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reflection_pad2d_9, convolution_11, primals_23, primals_24}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf85, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={view_43, primals_24, add_26, reflection_pad2d_9, convolution_11, sqrt_11, reciprocal_11, primals_23, var_11}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={view_43, primals_24, squeeze_69, add_26, reflection_pad2d_9, convolution_11, sqrt_11, reciprocal_11, primals_23, var_11}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={squeeze_70, view_43, primals_24, squeeze_69, add_26, reflection_pad2d_9, convolution_11, sqrt_11, reciprocal_11, primals_23, var_11}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={view_43, squeeze_70, primals_24, squeeze_69, add_26, reflection_pad2d_9, squeeze_71, convolution_11, sqrt_11, reciprocal_11, primals_23, var_11}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf87', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf83, i3 + 64 * i2 + 4096 * i1) - load(buf86, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf85, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={unsqueeze_71, full, primals_24, add_26, unsqueeze_1, unsqueeze_2, unsqueeze, convolution_11, view_44, mul_34, var_11, view_43, sub_11, mean_11, unsqueeze_69, reflection_pad2d_9, unsqueeze_70, new_zeros_11, relu_7, add_27, sqrt_11, mul_35, reciprocal_11, primals_23}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf88', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf87, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_10}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf90', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf89, i3 + 64 * i2 + 4096 * i1) + load(primals_26, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reflection_pad2d_10, primals_25, convolution_12, primals_26}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf92, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={add_28, primals_25, sqrt_12, view_48, var_12, reflection_pad2d_10, convolution_12, primals_26, reciprocal_12}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={add_28, primals_25, sqrt_12, view_48, var_12, reflection_pad2d_10, squeeze_75, convolution_12, primals_26, reciprocal_12}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={add_28, primals_25, sqrt_12, view_48, squeeze_76, var_12, reflection_pad2d_10, squeeze_75, convolution_12, primals_26, reciprocal_12}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={add_28, primals_25, sqrt_12, view_48, squeeze_76, var_12, reflection_pad2d_10, squeeze_75, squeeze_77, convolution_12, primals_26, reciprocal_12}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf94', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf80, i3 + 64 * i2 + 4096 * i1) + load(buf90, i3 + 64 * i2 + 4096 * i1) - load(buf93, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf92, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, sub_1, reflection_pad2d_4, convolution_10, primals_6, sub_2, view_34, primals_5, reciprocal_2, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_8, view_6, convolution_6, primals_10, convolution_2, add_23, primals_9, sub_6, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, primals_14, add_3, mul_5, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mul_19, new_zeros_10, add_25, add_24, primals_18, mul_4, mul_32, add_13, unsqueeze_65, add_2, primals_21, add_4, mean_12, new_zeros_6, view_42, add_14, mul_20, unsqueeze_41, var_12, add_15, new_zeros_2, convolution_12, unsqueeze_17, mul_7, sqrt_12, relu_2, unsqueeze_39, reflection_pad2d_10, view_41, add_5, mean_8, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, convolution_8, sub_4, unsqueeze_76, unsqueeze_52, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_8, reciprocal_4, mul_37, convolution, reflection_pad2d_2, reflection_pad2d_6, var_2, var_4, var_8, convolution_4, primals_2, unsqueeze_2, mul_25, view, new_zeros_12, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_8, view_48, mul_1, unsqueeze_51, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, mean_10, view_35, add_10, new_zeros, unsqueeze_27, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, view_21, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, view_3, view_1, primals_4, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf95', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf94, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_11}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf97', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf96, i3 + 64 * i2 + 4096 * i1) + load(primals_28, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_28, convolution_13, primals_27, reflection_pad2d_11}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf99, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={reciprocal_13, convolution_13, var_13, sqrt_13, view_50, primals_28, add_31, primals_27, reflection_pad2d_11}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reciprocal_13, convolution_13, var_13, sqrt_13, view_50, squeeze_81, primals_28, add_31, primals_27, reflection_pad2d_11}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={reciprocal_13, convolution_13, var_13, sqrt_13, squeeze_82, view_50, squeeze_81, primals_28, add_31, primals_27, reflection_pad2d_11}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={reciprocal_13, convolution_13, var_13, sqrt_13, squeeze_83, squeeze_82, view_50, squeeze_81, primals_28, add_31, primals_27, reflection_pad2d_11}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf101', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf97, i3 + 64 * i2 + 4096 * i1) - load(buf100, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf99, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_13, full, var_13, view_51, sqrt_13, unsqueeze_1, unsqueeze_2, view_50, primals_28, relu_8, unsqueeze, sub_13, primals_27, add_32, mul_41, unsqueeze_83, new_zeros_13, unsqueeze_82, convolution_13, mean_13, add_31, reflection_pad2d_11, mul_40, unsqueeze_81}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf102', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf101, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_12}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf104', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf103, i3 + 64 * i2 + 4096 * i1) + load(primals_30, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={convolution_14, reflection_pad2d_12, primals_29, primals_30}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf106, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={primals_30, primals_29, var_14, sqrt_14, add_33, view_55, convolution_14, reciprocal_14, reflection_pad2d_12}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={primals_30, squeeze_87, primals_29, var_14, sqrt_14, add_33, view_55, convolution_14, reciprocal_14, reflection_pad2d_12}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={primals_30, squeeze_87, squeeze_88, primals_29, var_14, sqrt_14, add_33, view_55, convolution_14, reciprocal_14, reflection_pad2d_12}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={primals_30, squeeze_87, primals_29, squeeze_88, var_14, sqrt_14, squeeze_89, add_33, view_55, convolution_14, reciprocal_14, reflection_pad2d_12}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf108', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf94, i3 + 64 * i2 + 4096 * i1) + load(buf104, i3 + 64 * i2 + 4096 * i1) - load(buf107, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf106, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, sub_1, reflection_pad2d_4, convolution_10, primals_6, sub_2, view_34, primals_5, reciprocal_2, unsqueeze_16, sqrt_10, reciprocal_6, view_10, var_10, view_27, view_7, unsqueeze_64, mean_4, view_8, view_6, convolution_6, primals_10, convolution_2, add_23, primals_9, sub_6, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, primals_14, add_3, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mul_19, new_zeros_10, mul_44, add_25, add_24, primals_18, mul_4, mul_32, add_13, add_2, unsqueeze_65, primals_21, add_4, mean_12, new_zeros_6, view_42, add_14, mul_20, unsqueeze_41, var_12, add_15, new_zeros_2, convolution_12, unsqueeze_17, primals_30, add_35, mul_7, sqrt_12, unsqueeze_89, relu_2, unsqueeze_39, reflection_pad2d_10, view_41, add_5, mean_8, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, sub_4, unsqueeze_76, unsqueeze_52, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_8, reciprocal_4, mul_37, convolution, reflection_pad2d_2, reflection_pad2d_6, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, unsqueeze_2, mul_25, view, new_zeros_12, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_8, view_48, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf109', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf108, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_13}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf111', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf110, i3 + 64 * i2 + 4096 * i1) + load(primals_32, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reflection_pad2d_13, convolution_15, primals_31, primals_32}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf113, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={var_15, convolution_15, sqrt_15, reflection_pad2d_13, primals_31, view_57, primals_32, reciprocal_15, add_36}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={var_15, convolution_15, sqrt_15, reflection_pad2d_13, squeeze_93, primals_31, view_57, primals_32, reciprocal_15, add_36}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={var_15, convolution_15, squeeze_94, sqrt_15, reflection_pad2d_13, squeeze_93, primals_31, view_57, primals_32, reciprocal_15, add_36}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={var_15, squeeze_95, convolution_15, squeeze_94, sqrt_15, reflection_pad2d_13, squeeze_93, primals_31, view_57, primals_32, reciprocal_15, add_36}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf115', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf111, i3 + 64 * i2 + 4096 * i1) - load(buf114, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf113, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={var_15, full, mul_46, unsqueeze_94, sqrt_15, unsqueeze_1, unsqueeze_2, unsqueeze, sub_15, add_37, mul_47, convolution_15, relu_9, reflection_pad2d_13, primals_31, view_57, primals_32, new_zeros_15, unsqueeze_93, view_58, reciprocal_15, unsqueeze_95, mean_15, add_36}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf116', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf115, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_14}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf118', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf117, i3 + 64 * i2 + 4096 * i1) + load(primals_34, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={convolution_16, primals_34, primals_33, reflection_pad2d_14}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf120, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={reciprocal_16, view_62, primals_34, primals_33, var_16, sqrt_16, add_38, reflection_pad2d_14, convolution_16}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={squeeze_99, reciprocal_16, view_62, primals_34, var_16, primals_33, sqrt_16, add_38, reflection_pad2d_14, convolution_16}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={squeeze_99, reciprocal_16, view_62, primals_34, var_16, primals_33, sqrt_16, add_38, squeeze_100, reflection_pad2d_14, convolution_16}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={squeeze_99, reciprocal_16, view_62, primals_34, var_16, primals_33, sqrt_16, add_38, squeeze_100, squeeze_101, reflection_pad2d_14, convolution_16}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf122', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf108, i3 + 64 * i2 + 4096 * i1) + load(buf118, i3 + 64 * i2 + 4096 * i1) - load(buf121, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf120, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_8, view_6, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mul_19, new_zeros_10, mul_44, mul_49, add_25, add_38, add_24, primals_18, mul_4, mul_32, add_13, unsqueeze_65, add_2, primals_21, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, unsqueeze_101, new_zeros_2, convolution_12, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, mean_8, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, sub_4, unsqueeze_76, unsqueeze_52, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_8, reciprocal_4, mul_37, convolution, reflection_pad2d_2, reflection_pad2d_6, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, unsqueeze_2, mul_25, view, new_zeros_12, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_8, view_48, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf122, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_15}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf125', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf124, i3 + 64 * i2 + 4096 * i1) + load(primals_36, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reflection_pad2d_15, primals_35, convolution_17, primals_36}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf127, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={reciprocal_17, primals_35, var_17, reflection_pad2d_15, sqrt_17, primals_36, convolution_17, add_41, view_64}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reciprocal_17, primals_35, var_17, reflection_pad2d_15, sqrt_17, primals_36, squeeze_105, convolution_17, add_41, view_64}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={reciprocal_17, squeeze_106, primals_35, var_17, reflection_pad2d_15, sqrt_17, primals_36, squeeze_105, convolution_17, add_41, view_64}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={reciprocal_17, squeeze_106, primals_35, var_17, reflection_pad2d_15, sqrt_17, squeeze_107, primals_36, squeeze_105, convolution_17, add_41, view_64}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf129', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf125, i3 + 64 * i2 + 4096 * i1) - load(buf128, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf127, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={full, mean_17, mul_52, add_42, unsqueeze_1, sub_17, unsqueeze_2, unsqueeze, view_65, unsqueeze_106, sqrt_17, primals_36, primals_35, add_41, reciprocal_17, unsqueeze_107, var_17, unsqueeze_105, mul_53, new_zeros_17, reflection_pad2d_15, convolution_17, relu_10, view_64}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf130', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf129, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_16}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf132', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf131, i3 + 64 * i2 + 4096 * i1) + load(primals_38, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={convolution_18, primals_38, primals_37, reflection_pad2d_16}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf134, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={convolution_18, sqrt_18, primals_37, view_69, var_18, primals_38, reflection_pad2d_16, reciprocal_18, add_43}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={convolution_18, sqrt_18, primals_37, view_69, var_18, squeeze_111, primals_38, reflection_pad2d_16, reciprocal_18, add_43}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={convolution_18, sqrt_18, primals_37, view_69, squeeze_112, var_18, squeeze_111, primals_38, reflection_pad2d_16, reciprocal_18, add_43}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={convolution_18, sqrt_18, primals_37, view_69, squeeze_112, var_18, squeeze_111, primals_38, reflection_pad2d_16, squeeze_113, reciprocal_18, add_43}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf136', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf122, i3 + 64 * i2 + 4096 * i1) + load(buf132, i3 + 64 * i2 + 4096 * i1) - load(buf135, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf134, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, unsqueeze_16, sqrt_10, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_69, view_8, view_6, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mul_19, new_zeros_10, mul_44, mul_49, add_25, add_38, add_24, primals_18, mul_4, mul_32, add_13, add_2, unsqueeze_65, primals_21, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, unsqueeze_101, new_zeros_2, convolution_12, mean_18, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, primals_37, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, primals_38, mean_8, sub_18, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, reflection_pad2d_16, sub_4, unsqueeze_76, reciprocal_18, unsqueeze_52, add_28, sqrt_18, view_12, unsqueeze_77, mul_38, var_18, convolution_18, sqrt_4, reciprocal_8, reciprocal_4, new_zeros_18, mul_37, convolution, add_45, reflection_pad2d_2, reflection_pad2d_6, mul_56, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, view, unsqueeze_111, new_zeros_12, add_44, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_112, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_43, add_8, view_48, mul_55, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf137', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf136, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_17}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf139', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf138, i3 + 64 * i2 + 4096 * i1) + load(primals_40, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reflection_pad2d_17, primals_39, primals_40, convolution_19}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf141, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={reciprocal_19, add_46, primals_39, sqrt_19, convolution_19, view_71, var_19, primals_40, reflection_pad2d_17}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reciprocal_19, squeeze_117, add_46, convolution_19, sqrt_19, primals_39, view_71, var_19, primals_40, reflection_pad2d_17}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={squeeze_118, reciprocal_19, squeeze_117, add_46, convolution_19, sqrt_19, primals_39, view_71, var_19, primals_40, reflection_pad2d_17}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={squeeze_119, squeeze_118, reciprocal_19, squeeze_117, add_46, convolution_19, sqrt_19, primals_39, view_71, var_19, primals_40, reflection_pad2d_17}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf143', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf139, i3 + 64 * i2 + 4096 * i1) - load(buf142, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf141, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={full, reciprocal_19, add_46, unsqueeze_1, unsqueeze_119, unsqueeze_2, primals_39, sqrt_19, add_47, relu_11, unsqueeze, view_72, unsqueeze_118, primals_40, var_19, mul_58, unsqueeze_117, mean_19, convolution_19, sub_19, new_zeros_19, view_71, mul_59, reflection_pad2d_17}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf144', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf143, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
    ranges=[1, 256, 66, 66],
    origins={reflection_pad2d_18}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf146', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf145, i3 + 64 * i2 + 4096 * i1) + load(primals_42, i1),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={primals_42, reflection_pad2d_18, convolution_20, primals_41}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf148, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 256, 1, 1],
            origins={reflection_pad2d_18, reciprocal_20, sqrt_20, view_76, convolution_20, var_20, primals_42, primals_41, add_48}
          )
        ),
        size=(1, 256, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={reflection_pad2d_18, reciprocal_20, sqrt_20, view_76, convolution_20, squeeze_123, var_20, primals_42, primals_41, add_48}
      ),
      size=(1, 256),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={reflection_pad2d_18, reciprocal_20, sqrt_20, view_76, convolution_20, squeeze_123, var_20, primals_42, primals_41, add_48, squeeze_124}
    ),
    size=(256,),
    reindex=lambda i0: [0, i0],
    origins={reflection_pad2d_18, reciprocal_20, sqrt_20, view_76, convolution_20, squeeze_123, var_20, primals_42, primals_41, add_48, squeeze_124, squeeze_125}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf150', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf136, i3 + 64 * i2 + 4096 * i1) + load(buf146, i3 + 64 * i2 + 4096 * i1) - load(buf149, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf148, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
    ranges=torch.Size([1, 256, 64, 64]),
    origins={reciprocal_1, mean_6, unsqueeze_124, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, var_20, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_69, mul_61, view_8, view_6, convolution_20, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, add_48, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, unsqueeze_123, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, new_zeros_20, mul_19, add_50, new_zeros_10, mul_44, mul_49, add_25, add_49, add_38, add_24, mul_62, primals_18, mul_4, mul_32, add_13, unsqueeze_65, add_2, primals_21, view_77, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, unsqueeze_125, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, unsqueeze_101, new_zeros_2, convolution_12, mean_18, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, primals_37, view_76, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, primals_38, mean_8, sub_18, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, reflection_pad2d_16, sub_4, unsqueeze_76, reciprocal_18, primals_42, primals_41, unsqueeze_52, add_28, sqrt_18, view_12, unsqueeze_77, mul_38, var_18, convolution_18, sqrt_4, reciprocal_8, reciprocal_4, new_zeros_18, mul_37, convolution, add_45, reflection_pad2d_2, reflection_pad2d_6, mul_56, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, new_zeros_12, add_44, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_112, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_43, add_8, view_48, mul_55, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, sub_20, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, sqrt_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf152', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf151, i3 + 128 * i2 + 16384 * i1) + load(primals_44, i1),
    ranges=torch.Size([1, 128, 128, 128]),
    origins={reciprocal_1, mean_6, unsqueeze_124, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, var_20, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_69, mul_61, view_8, view_6, convolution_20, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, add_48, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, unsqueeze_123, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, new_zeros_20, mul_19, add_50, new_zeros_10, mul_44, mul_49, add_25, add_49, add_38, add_24, mul_62, primals_18, mul_4, mul_32, add_2, unsqueeze_65, add_13, primals_21, view_77, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, unsqueeze_125, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, unsqueeze_101, new_zeros_2, convolution_12, mean_18, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, primals_37, view_76, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, primals_38, mean_8, sub_18, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, reflection_pad2d_16, sub_4, unsqueeze_76, reciprocal_18, primals_42, primals_41, unsqueeze_52, add_28, sqrt_18, view_12, primals_43, unsqueeze_77, mul_38, var_18, convolution_18, sqrt_4, reciprocal_8, primals_44, reciprocal_4, new_zeros_18, mul_37, convolution, add_45, reflection_pad2d_2, reflection_pad2d_6, mul_56, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, new_zeros_12, add_44, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_112, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_43, add_8, view_48, mul_55, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, sqrt_1, sub_20, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, convolution_21}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf156, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 128, 1, 1],
            origins={mean_6, reciprocal_16, view_62, reflection_pad2d_4, sub_16, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_21, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, var_21, unsqueeze_11, new_zeros_1, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, sqrt_21, unsqueeze_65, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
          )
        ),
        size=(1, 128, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_21, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, var_21, unsqueeze_11, new_zeros_1, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, sqrt_21, unsqueeze_65, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_129, add_51, var_12, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
      ),
      size=(1, 128),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_21, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, var_21, unsqueeze_11, new_zeros_1, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, sqrt_21, unsqueeze_65, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_129, add_51, var_12, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, squeeze_130, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
    ),
    size=(128,),
    reindex=lambda i0: [0, i0],
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_21, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, var_21, unsqueeze_11, new_zeros_1, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, sqrt_21, unsqueeze_65, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_131, squeeze_129, add_51, var_12, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, squeeze_130, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf159', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf152, i3 + 128 * i2 + 16384 * i1) - load(buf158, i1) / index_expr(16384, torch.float32) * reciprocal(sqrt(load(buf156, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 128, 128, 128]),
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf161', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf160, i3 + 256 * i2 + 65536 * i1) + load(primals_46, i1),
    ranges=torch.Size([1, 64, 256, 256]),
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  ))
)), TensorBox(
  View(
    View(
      View(
        StorageBox(
          Pointwise(
            'cuda',
            torch.float32,
            reciprocal(sqrt(load(buf165, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))),
            ranges=[1, 64, 1, 1],
            origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, reciprocal_22, mul_8, view_13, var_22, sqrt_22, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, add_53, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
          )
        ),
        size=(1, 64, 1),
        reindex=lambda i0, i1, i2: [0, i1, 0, 0],
        origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, reciprocal_22, mul_8, view_13, var_22, sqrt_22, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, squeeze_135, add_53, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
      ),
      size=(1, 64),
      reindex=lambda i0, i1: [0, i1, 0],
      origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, reciprocal_22, mul_8, view_13, var_22, sqrt_22, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, squeeze_135, add_53, mul_37, squeeze_136, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
    ),
    size=(64,),
    reindex=lambda i0: [0, i0],
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, reciprocal_22, mul_8, view_13, var_22, sqrt_22, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, squeeze_137, squeeze_135, add_53, mul_37, squeeze_136, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf168', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf161, i3 + 256 * i2 + 65536 * i1) - load(buf167, i1) / index_expr(65536, torch.float32) * reciprocal(sqrt(load(buf165, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
    ranges=torch.Size([1, 64, 256, 256]),
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, mean_22, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, reciprocal_22, view_13, var_22, sqrt_22, view_83, unsqueeze_15, sub_22, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, view_84, add_28, view_12, unsqueeze_135, unsqueeze_77, mul_38, relu_13, add_54, sqrt_4, mul_68, unsqueeze_137, reciprocal_4, new_zeros_22, mul_37, add_53, reflection_pad2d_2, mul_67, new_zeros_14, var_4, convolution_4, unsqueeze_136, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf169', layout=FixedLayout('cuda', torch.float32, size=[1, 64, 262, 262], stride=[4393216, 68644, 262, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf168, constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i3, torch.int32) - constant(3, torch.int32))) + 256 * constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i2, torch.int32) - constant(3, torch.int32))) + 65536 * i1),
    ranges=[1, 64, 262, 262],
    origins={reflection_pad2d_19}
  ))
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    reciprocal(exp(load(buf170, i3 + 256 * i2 + 65536 * i1) + load(primals_48, i1) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
    ranges=torch.Size([1, 3, 256, 256]),
    origins={reciprocal_23, primals_47, mul_70, primals_48, add_55, exp, reflection_pad2d_19, mul_69, sub_23, convolution_23}
  )
)), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf167, i1) / index_expr(65536, torch.float32),
                  ranges=[1, 64, 1, 1],
                  origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
                )
              ),
              size=(1, 64, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, squeeze_132, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
            ),
            size=(1, 64),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, squeeze_133, reflection_pad2d_2, squeeze_132, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
          ),
          size=(64,),
          reindex=lambda i0: [0, i0],
          origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, squeeze_134, squeeze_133, reflection_pad2d_2, squeeze_132, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
        ),
        size=(1, 64),
        reindex=lambda i0, i1: [i1],
        origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, squeeze_134, squeeze_133, reflection_pad2d_2, squeeze_132, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_138, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
      ),
      size=(1, 64, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, squeeze_134, squeeze_133, reflection_pad2d_2, squeeze_132, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_139, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_138, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
    ),
    size=(1, 64, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, primals_45, reciprocal_8, primals_44, primals_46, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, reciprocal_21, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, sub_21, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, var_21, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, add_2, unsqueeze_65, sqrt_21, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, add_51, var_12, mul_64, new_zeros_2, convolution_12, unsqueeze_17, add_35, mean_22, mul_7, sqrt_12, unsqueeze_130, relu_2, reflection_pad2d_10, unsqueeze_131, mul_65, add_5, view_81, convolution_22, reciprocal_12, mul_8, view_13, view_83, unsqueeze_15, view_11, new_zeros_21, relu_12, unsqueeze_129, view_79, sub_12, add_52, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, squeeze_134, squeeze_133, reflection_pad2d_2, squeeze_132, new_zeros_14, var_4, convolution_4, view_80, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_139, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_138, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, unsqueeze_140, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf158, i1) / index_expr(16384, torch.float32),
                  ranges=[1, 128, 1, 1],
                  origins={reciprocal_1, mean_6, unsqueeze_124, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, var_20, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_69, mul_61, view_8, view_6, convolution_20, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, add_48, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, unsqueeze_123, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mean_21, new_zeros_20, mul_19, add_50, new_zeros_10, mul_44, mul_49, add_25, add_49, add_38, add_24, mul_62, primals_18, mul_4, mul_32, add_2, unsqueeze_65, add_13, primals_21, view_77, view_78, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, unsqueeze_125, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, unsqueeze_101, new_zeros_2, convolution_12, mean_18, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, primals_37, view_76, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, primals_38, mean_8, sub_18, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, reflection_pad2d_16, sub_4, unsqueeze_76, reciprocal_18, primals_42, primals_41, unsqueeze_52, add_28, sqrt_18, view_12, primals_43, unsqueeze_77, mul_38, var_18, convolution_18, sqrt_4, reciprocal_8, primals_44, reciprocal_4, new_zeros_18, mul_37, convolution, add_45, reflection_pad2d_2, reflection_pad2d_6, mul_56, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, new_zeros_12, add_44, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_112, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_43, add_8, view_48, mul_55, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, sqrt_1, sub_20, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, convolution_21}
                )
              ),
              size=(1, 128, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={reciprocal_1, mean_6, unsqueeze_124, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, var_20, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_69, mul_61, view_8, view_6, convolution_20, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, add_48, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, unsqueeze_123, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mean_21, new_zeros_20, mul_19, add_50, new_zeros_10, mul_44, mul_49, add_25, add_49, add_38, add_24, mul_62, primals_18, mul_4, mul_32, unsqueeze_65, add_2, add_13, primals_21, view_77, view_78, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, unsqueeze_125, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, squeeze_126, unsqueeze_101, new_zeros_2, convolution_12, mean_18, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, primals_37, view_76, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, primals_38, mean_8, sub_18, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, reflection_pad2d_16, sub_4, unsqueeze_76, reciprocal_18, primals_42, primals_41, unsqueeze_52, add_28, sqrt_18, view_12, primals_43, unsqueeze_77, mul_38, var_18, convolution_18, sqrt_4, reciprocal_8, primals_44, reciprocal_4, new_zeros_18, mul_37, convolution, add_45, reflection_pad2d_2, reflection_pad2d_6, mul_56, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, new_zeros_12, add_44, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_112, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_43, add_8, view_48, mul_55, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, sqrt_1, sub_20, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, convolution_21}
            ),
            size=(1, 128),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={reciprocal_1, mean_6, unsqueeze_124, reciprocal_16, sub_1, view_62, reflection_pad2d_4, convolution_10, sub_16, primals_6, sub_2, view_34, sqrt_16, var_20, primals_5, reciprocal_2, reflection_pad2d_14, convolution_16, sqrt_10, unsqueeze_16, reciprocal_6, var_10, view_10, view_27, view_7, unsqueeze_64, mean_4, view_69, mul_61, view_8, view_6, convolution_20, convolution_6, var_16, primals_10, convolution_2, add_23, mean_16, primals_9, sub_6, add_48, var_6, sqrt_2, mul_31, sqrt_6, unsqueeze_9, unsqueeze_40, relu_1, unsqueeze_100, primals_14, add_3, unsqueeze_123, add_34, mul_5, sub_14, unsqueeze_63, unsqueeze_11, primals_17, new_zeros_1, primals_13, mean_21, new_zeros_20, mul_19, add_50, new_zeros_10, mul_44, mul_49, add_25, add_49, add_38, add_24, mul_62, primals_18, mul_4, mul_32, add_2, unsqueeze_65, add_13, primals_21, view_77, view_78, add_4, mean_12, new_zeros_6, view_42, new_zeros_16, add_40, add_14, unsqueeze_125, mul_20, add_39, unsqueeze_41, mul_50, primals_33, var_12, add_15, squeeze_126, unsqueeze_101, new_zeros_2, convolution_12, mean_18, unsqueeze_17, primals_30, add_35, unsqueeze_99, mul_7, sqrt_12, unsqueeze_89, primals_37, squeeze_127, view_76, relu_2, unsqueeze_39, reflection_pad2d_10, view_63, view_41, add_5, primals_38, mean_8, sub_18, reciprocal_12, mul_8, primals_22, view_13, sub_8, unsqueeze_15, view_11, view_28, sqrt_8, primals_26, primals_25, sub_12, primals_29, convolution_8, primals_34, reflection_pad2d_16, sub_4, unsqueeze_76, reciprocal_18, primals_42, primals_41, unsqueeze_52, add_28, sqrt_18, view_12, primals_43, unsqueeze_77, mul_38, var_18, convolution_18, sqrt_4, reciprocal_8, primals_44, reciprocal_4, new_zeros_18, mul_37, convolution, add_45, reflection_pad2d_2, reflection_pad2d_6, mul_56, new_zeros_14, var_2, var_4, var_8, convolution_4, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, new_zeros_12, add_44, reflection_pad2d, add_30, primals_3, sqrt, mul_13, add_18, sub, unsqueeze_75, view_49, mean, new_zeros_8, add_19, add_29, var, mul_26, unsqueeze_112, unsqueeze_53, unsqueeze_28, add_20, var_1, mean_1, add_43, add_8, view_48, mul_55, mul_1, unsqueeze_51, mean_14, add, new_zeros_4, add_9, primals_1, mul_14, unsqueeze_29, unsqueeze_88, mean_10, view_35, add_10, view_56, new_zeros, sqrt_1, sub_20, unsqueeze_27, relu, sqrt_14, reciprocal, mul_2, unsqueeze_5, convolution_14, sub_10, reciprocal_14, unsqueeze_4, unsqueeze_3, full, add_1, var_14, unsqueeze_1, view_21, unsqueeze, mean_2, view_55, reciprocal_10, view_20, reflection_pad2d_12, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, unsqueeze_87, view_3, view_1, primals_4, add_33, mul_43, convolution_1, convolution_21}
          ),
          size=(128,),
          reindex=lambda i0: [0, i0],
          origins={mean_6, reciprocal_16, view_62, reflection_pad2d_4, sub_16, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, unsqueeze_65, add_2, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_128, var_12, squeeze_126, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, squeeze_127, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
        ),
        size=(1, 128),
        reindex=lambda i0, i1: [i1],
        origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, unsqueeze_65, add_2, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_128, var_12, squeeze_126, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, squeeze_127, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, unsqueeze_150, add_10, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
      ),
      size=(1, 128, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, convolution_21, sqrt_1, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, unsqueeze_65, add_2, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_128, var_12, squeeze_126, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, squeeze_127, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, unsqueeze_150, add_10, unsqueeze_151, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
    ),
    size=(1, 128, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={mean_6, reciprocal_16, view_62, sub_16, reflection_pad2d_4, primals_6, view_34, sqrt_16, primals_5, reflection_pad2d_14, convolution_16, reciprocal_6, view_27, view_69, convolution_6, var_16, primals_10, mean_16, primals_9, sub_6, var_6, sqrt_6, unsqueeze_40, unsqueeze_100, primals_14, add_34, primals_17, primals_13, mul_19, mul_49, add_38, primals_18, add_13, primals_21, new_zeros_6, new_zeros_16, add_40, add_14, mul_20, add_39, unsqueeze_41, mul_50, primals_33, add_15, unsqueeze_101, mean_18, primals_30, unsqueeze_99, primals_37, unsqueeze_89, view_76, unsqueeze_39, view_63, view_41, primals_38, mean_8, sub_18, primals_22, sub_8, view_28, sqrt_8, primals_26, primals_25, primals_29, convolution_8, primals_34, reflection_pad2d_16, primals_42, reciprocal_18, primals_41, unsqueeze_52, sqrt_18, primals_43, var_18, convolution_18, reciprocal_8, primals_44, new_zeros_18, convolution, add_45, reflection_pad2d_6, mul_56, var_2, var_8, primals_2, view_70, unsqueeze_113, unsqueeze_2, mul_25, mean_20, view, unsqueeze_111, add_44, reflection_pad2d, primals_3, sqrt, add_18, sub, mean, new_zeros_8, add_19, var, mul_26, unsqueeze_112, unsqueeze_53, add_20, var_1, mean_1, add_43, view_48, mul_55, mul_1, unsqueeze_51, add, primals_1, mean_10, view_35, view_56, new_zeros, sub_20, relu, reciprocal, mul_2, unsqueeze_5, sub_10, unsqueeze_4, unsqueeze_3, full, add_1, unsqueeze_1, unsqueeze, mean_2, reciprocal_10, view_20, unsqueeze_10, view_5, view_2, reflection_pad2d_8, reflection_pad2d_18, reciprocal_20, sqrt_20, view_3, view_1, convolution_1, sqrt_1, convolution_21, reciprocal_1, unsqueeze_124, sub_1, convolution_10, sub_2, var_20, reciprocal_2, sqrt_10, unsqueeze_16, var_10, view_10, view_7, unsqueeze_64, mean_4, mul_61, view_8, view_6, convolution_20, convolution_2, add_23, add_48, sqrt_2, mul_31, unsqueeze_9, relu_1, add_3, unsqueeze_123, mul_5, sub_14, unsqueeze_63, unsqueeze_11, new_zeros_1, mean_21, new_zeros_20, add_50, new_zeros_10, mul_44, add_25, add_49, add_24, mul_62, mul_4, mul_32, unsqueeze_65, add_2, view_77, view_78, add_4, mean_12, view_42, unsqueeze_125, squeeze_128, var_12, squeeze_126, new_zeros_2, convolution_12, unsqueeze_17, add_35, mul_7, sqrt_12, squeeze_127, relu_2, reflection_pad2d_10, add_5, reciprocal_12, mul_8, view_13, unsqueeze_15, view_11, sub_12, sub_4, unsqueeze_76, add_28, view_12, unsqueeze_77, mul_38, sqrt_4, reciprocal_4, mul_37, reflection_pad2d_2, new_zeros_14, var_4, convolution_4, new_zeros_12, add_30, mul_13, unsqueeze_75, view_49, unsqueeze_152, add_29, unsqueeze_28, add_8, mean_14, new_zeros_4, add_9, mul_14, unsqueeze_29, unsqueeze_88, unsqueeze_150, add_10, unsqueeze_151, unsqueeze_27, sqrt_14, convolution_14, reciprocal_14, var_14, view_21, view_55, reflection_pad2d_12, unsqueeze_87, primals_4, add_33, mul_43}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf149, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={reflection_pad2d_18, convolution_20, primals_42, primals_41, view_76, mean_20}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mean_20, squeeze_120, reflection_pad2d_18, primals_42, primals_41, view_76, convolution_20}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={reflection_pad2d_18, squeeze_120, view_76, convolution_20, mean_20, primals_42, squeeze_121, primals_41}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={reflection_pad2d_18, squeeze_120, view_76, convolution_20, mean_20, squeeze_122, primals_42, squeeze_121, primals_41}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={reflection_pad2d_18, squeeze_120, view_76, convolution_20, mean_20, squeeze_122, primals_42, squeeze_121, primals_41, unsqueeze_162}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={reflection_pad2d_18, squeeze_120, view_76, unsqueeze_163, convolution_20, mean_20, squeeze_122, primals_42, squeeze_121, primals_41, unsqueeze_162}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={reflection_pad2d_18, squeeze_120, view_76, unsqueeze_163, convolution_20, mean_20, squeeze_122, primals_42, squeeze_121, unsqueeze_164, primals_41, unsqueeze_162}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf142, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_39, reflection_pad2d_17, view_71, primals_40, mean_19, convolution_19}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={squeeze_114, reflection_pad2d_17, convolution_19, view_71, primals_40, mean_19, primals_39}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mean_19, convolution_19, primals_39, squeeze_114, squeeze_115, view_71, primals_40, reflection_pad2d_17}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={mean_19, primals_39, convolution_19, squeeze_114, squeeze_115, squeeze_116, view_71, primals_40, reflection_pad2d_17}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={mean_19, convolution_19, primals_39, squeeze_114, squeeze_115, unsqueeze_174, squeeze_116, view_71, primals_40, reflection_pad2d_17}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mean_19, primals_39, convolution_19, unsqueeze_175, squeeze_114, squeeze_115, unsqueeze_174, squeeze_116, view_71, primals_40, reflection_pad2d_17}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={unsqueeze_176, mean_19, convolution_19, primals_39, unsqueeze_175, squeeze_114, squeeze_115, unsqueeze_174, squeeze_116, view_71, primals_40, reflection_pad2d_17}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf135, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_38, primals_37, reflection_pad2d_16, view_69, convolution_18, mean_18}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={squeeze_108, primals_38, primals_37, reflection_pad2d_16, view_69, convolution_18, mean_18}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={squeeze_108, primals_37, view_69, primals_38, reflection_pad2d_16, convolution_18, mean_18, squeeze_109}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={squeeze_108, primals_37, view_69, primals_38, reflection_pad2d_16, convolution_18, squeeze_110, mean_18, squeeze_109}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={squeeze_108, primals_37, view_69, unsqueeze_186, primals_38, reflection_pad2d_16, convolution_18, squeeze_110, mean_18, squeeze_109}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={squeeze_108, unsqueeze_187, primals_37, view_69, unsqueeze_186, primals_38, reflection_pad2d_16, convolution_18, squeeze_110, mean_18, squeeze_109}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={squeeze_108, unsqueeze_187, primals_37, view_69, unsqueeze_186, primals_38, reflection_pad2d_16, convolution_18, squeeze_110, mean_18, squeeze_109, unsqueeze_188}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf128, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_35, reflection_pad2d_15, mean_17, primals_36, convolution_17, view_64}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={primals_35, reflection_pad2d_15, mean_17, primals_36, convolution_17, squeeze_102, view_64}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={squeeze_103, mean_17, primals_35, squeeze_102, reflection_pad2d_15, primals_36, convolution_17, view_64}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={squeeze_103, mean_17, primals_35, view_64, squeeze_102, reflection_pad2d_15, primals_36, convolution_17, squeeze_104}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={squeeze_103, mean_17, primals_35, view_64, squeeze_102, reflection_pad2d_15, unsqueeze_198, primals_36, convolution_17, squeeze_104}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={squeeze_103, mean_17, primals_35, view_64, squeeze_102, unsqueeze_199, reflection_pad2d_15, unsqueeze_198, primals_36, convolution_17, squeeze_104}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={squeeze_103, unsqueeze_200, mean_17, primals_35, view_64, squeeze_102, unsqueeze_199, reflection_pad2d_15, unsqueeze_198, primals_36, convolution_17, squeeze_104}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf121, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={mean_16, reflection_pad2d_14, convolution_16, view_62, primals_34, primals_33}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mean_16, view_62, reflection_pad2d_14, convolution_16, squeeze_96, primals_34, primals_33}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={squeeze_97, view_62, squeeze_96, primals_34, primals_33, mean_16, reflection_pad2d_14, convolution_16}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={squeeze_98, squeeze_97, view_62, primals_34, squeeze_96, primals_33, mean_16, reflection_pad2d_14, convolution_16}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={squeeze_98, squeeze_97, view_62, primals_34, squeeze_96, primals_33, mean_16, reflection_pad2d_14, unsqueeze_210, convolution_16}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={squeeze_98, squeeze_97, view_62, primals_34, squeeze_96, primals_33, mean_16, reflection_pad2d_14, unsqueeze_211, unsqueeze_210, convolution_16}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={squeeze_98, squeeze_97, view_62, primals_34, squeeze_96, primals_33, mean_16, reflection_pad2d_14, unsqueeze_211, unsqueeze_210, unsqueeze_212, convolution_16}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf114, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={convolution_15, primals_31, reflection_pad2d_13, mean_15, view_57, primals_32}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={convolution_15, primals_31, reflection_pad2d_13, mean_15, view_57, squeeze_90, primals_32}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={convolution_15, reflection_pad2d_13, view_57, primals_31, squeeze_91, primals_32, mean_15, squeeze_90}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={convolution_15, squeeze_92, reflection_pad2d_13, primals_31, view_57, squeeze_91, primals_32, mean_15, squeeze_90}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={convolution_15, mean_15, squeeze_92, reflection_pad2d_13, view_57, primals_31, squeeze_91, primals_32, unsqueeze_222, squeeze_90}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={convolution_15, mean_15, squeeze_92, reflection_pad2d_13, view_57, unsqueeze_223, primals_31, squeeze_91, primals_32, unsqueeze_222, squeeze_90}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={convolution_15, mean_15, squeeze_92, reflection_pad2d_13, view_57, unsqueeze_223, primals_31, squeeze_91, primals_32, unsqueeze_224, unsqueeze_222, squeeze_90}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf107, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_30, primals_29, view_55, convolution_14, mean_14, reflection_pad2d_12}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={primals_30, primals_29, view_55, convolution_14, mean_14, reflection_pad2d_12, squeeze_84}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={primals_30, primals_29, mean_14, squeeze_85, view_55, convolution_14, reflection_pad2d_12, squeeze_84}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={primals_30, primals_29, mean_14, squeeze_85, view_55, convolution_14, squeeze_86, reflection_pad2d_12, squeeze_84}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={primals_30, primals_29, mean_14, squeeze_85, unsqueeze_234, view_55, convolution_14, squeeze_86, reflection_pad2d_12, squeeze_84}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={primals_30, primals_29, mean_14, squeeze_85, unsqueeze_235, unsqueeze_234, view_55, convolution_14, squeeze_86, reflection_pad2d_12, squeeze_84}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={primals_30, primals_29, mean_14, squeeze_85, squeeze_84, unsqueeze_235, unsqueeze_234, view_55, convolution_14, squeeze_86, reflection_pad2d_12, unsqueeze_236}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf100, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_28, convolution_13, primals_27, mean_13, reflection_pad2d_11, view_50}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={primals_28, convolution_13, primals_27, mean_13, squeeze_78, reflection_pad2d_11, view_50}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={convolution_13, mean_13, squeeze_79, view_50, primals_28, primals_27, reflection_pad2d_11, squeeze_78}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={convolution_13, mean_13, squeeze_79, reflection_pad2d_11, view_50, primals_28, squeeze_80, primals_27, squeeze_78}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={convolution_13, mean_13, squeeze_79, view_50, primals_28, unsqueeze_246, squeeze_80, reflection_pad2d_11, primals_27, squeeze_78}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={convolution_13, unsqueeze_247, mean_13, squeeze_79, view_50, primals_28, unsqueeze_246, squeeze_80, reflection_pad2d_11, primals_27, squeeze_78}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={convolution_13, mean_13, unsqueeze_247, squeeze_79, unsqueeze_248, view_50, primals_28, unsqueeze_246, squeeze_80, reflection_pad2d_11, primals_27, squeeze_78}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf93, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={mean_12, reflection_pad2d_10, primals_25, view_48, convolution_12, primals_26}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mean_12, squeeze_72, primals_25, reflection_pad2d_10, view_48, convolution_12, primals_26}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mean_12, primals_25, squeeze_73, view_48, reflection_pad2d_10, squeeze_72, convolution_12, primals_26}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={mean_12, primals_25, view_48, squeeze_73, squeeze_74, reflection_pad2d_10, squeeze_72, convolution_12, primals_26}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={mean_12, primals_25, view_48, squeeze_73, unsqueeze_258, squeeze_74, reflection_pad2d_10, squeeze_72, convolution_12, primals_26}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mean_12, primals_25, view_48, squeeze_73, unsqueeze_259, unsqueeze_258, squeeze_74, reflection_pad2d_10, squeeze_72, convolution_12, primals_26}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={mean_12, primals_25, view_48, squeeze_73, unsqueeze_259, unsqueeze_258, squeeze_74, reflection_pad2d_10, squeeze_72, unsqueeze_260, convolution_12, primals_26}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf86, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={convolution_11, view_43, primals_24, primals_23, mean_11, reflection_pad2d_9}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={convolution_11, view_43, squeeze_66, primals_24, mean_11, reflection_pad2d_9, primals_23}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={view_43, primals_24, mean_11, reflection_pad2d_9, convolution_11, squeeze_67, squeeze_66, primals_23}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={view_43, primals_24, mean_11, squeeze_68, reflection_pad2d_9, convolution_11, squeeze_67, squeeze_66, primals_23}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={unsqueeze_270, view_43, primals_24, mean_11, squeeze_68, reflection_pad2d_9, convolution_11, squeeze_67, squeeze_66, primals_23}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={unsqueeze_271, unsqueeze_270, view_43, primals_24, mean_11, squeeze_68, reflection_pad2d_9, convolution_11, squeeze_67, squeeze_66, primals_23}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={unsqueeze_271, unsqueeze_270, view_43, primals_24, mean_11, squeeze_68, reflection_pad2d_9, convolution_11, squeeze_67, squeeze_66, unsqueeze_272, primals_23}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf79, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={view_41, reflection_pad2d_8, primals_22, mean_10, primals_21, convolution_10}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={view_41, primals_22, reflection_pad2d_8, mean_10, convolution_10, primals_21, squeeze_60}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={reflection_pad2d_8, primals_22, convolution_10, view_41, mean_10, primals_21, squeeze_61, squeeze_60}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={primals_22, reflection_pad2d_8, convolution_10, view_41, squeeze_62, mean_10, primals_21, squeeze_61, squeeze_60}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={reflection_pad2d_8, primals_22, unsqueeze_282, convolution_10, view_41, squeeze_62, mean_10, primals_21, squeeze_61, squeeze_60}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={primals_22, reflection_pad2d_8, unsqueeze_282, convolution_10, unsqueeze_283, view_41, squeeze_62, mean_10, primals_21, squeeze_61, squeeze_60}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={reflection_pad2d_8, primals_22, unsqueeze_282, convolution_10, unsqueeze_283, view_41, squeeze_62, mean_10, primals_21, squeeze_61, unsqueeze_284, squeeze_60}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf72, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={convolution_9, mean_9, reflection_pad2d_7, primals_20, view_36, primals_19}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={convolution_9, mean_9, reflection_pad2d_7, squeeze_54, primals_20, view_36, primals_19}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={convolution_9, squeeze_54, squeeze_55, primals_19, mean_9, reflection_pad2d_7, primals_20, view_36}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={convolution_9, squeeze_54, squeeze_56, squeeze_55, primals_19, mean_9, reflection_pad2d_7, primals_20, view_36}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={convolution_9, squeeze_54, squeeze_56, squeeze_55, primals_19, mean_9, reflection_pad2d_7, primals_20, unsqueeze_294, view_36}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={convolution_9, unsqueeze_295, squeeze_54, squeeze_56, squeeze_55, primals_19, mean_9, reflection_pad2d_7, primals_20, unsqueeze_294, view_36}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={convolution_9, unsqueeze_295, squeeze_54, squeeze_56, squeeze_55, primals_19, mean_9, reflection_pad2d_7, primals_20, unsqueeze_296, unsqueeze_294, view_36}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf65, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={view_34, primals_18, mean_8, convolution_8, reflection_pad2d_6, primals_17}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={view_34, primals_18, mean_8, convolution_8, reflection_pad2d_6, squeeze_48, primals_17}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={convolution_8, squeeze_48, view_34, primals_18, mean_8, squeeze_49, reflection_pad2d_6, primals_17}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={squeeze_50, convolution_8, squeeze_48, view_34, primals_18, mean_8, squeeze_49, reflection_pad2d_6, primals_17}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={squeeze_50, convolution_8, squeeze_48, view_34, primals_18, mean_8, squeeze_49, unsqueeze_306, reflection_pad2d_6, primals_17}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={squeeze_50, convolution_8, squeeze_48, primals_17, view_34, primals_18, mean_8, unsqueeze_307, unsqueeze_306, reflection_pad2d_6, squeeze_49}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={unsqueeze_307, squeeze_50, convolution_8, unsqueeze_308, squeeze_48, view_34, primals_18, mean_8, squeeze_49, unsqueeze_306, reflection_pad2d_6, primals_17}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf58, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={mean_7, reflection_pad2d_5, view_29, primals_16, primals_15, convolution_7}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mean_7, squeeze_42, reflection_pad2d_5, view_29, primals_16, primals_15, convolution_7}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mean_7, reflection_pad2d_5, primals_16, primals_15, squeeze_43, squeeze_42, view_29, convolution_7}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={mean_7, reflection_pad2d_5, primals_16, primals_15, squeeze_44, squeeze_43, squeeze_42, view_29, convolution_7}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={mean_7, reflection_pad2d_5, primals_16, primals_15, unsqueeze_318, squeeze_44, squeeze_43, squeeze_42, view_29, convolution_7}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={unsqueeze_319, mean_7, reflection_pad2d_5, primals_16, primals_15, unsqueeze_318, squeeze_44, squeeze_43, squeeze_42, view_29, convolution_7}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={unsqueeze_319, mean_7, reflection_pad2d_5, primals_16, primals_15, unsqueeze_318, squeeze_44, squeeze_43, unsqueeze_320, squeeze_42, view_29, convolution_7}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf51, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_14, mean_6, primals_13, reflection_pad2d_4, convolution_6, view_27}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={primals_14, primals_13, mean_6, squeeze_36, reflection_pad2d_4, convolution_6, view_27}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={primals_13, mean_6, squeeze_36, reflection_pad2d_4, convolution_6, primals_14, squeeze_37, view_27}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={squeeze_38, primals_13, mean_6, squeeze_36, reflection_pad2d_4, convolution_6, primals_14, squeeze_37, view_27}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={squeeze_38, primals_13, mean_6, squeeze_36, reflection_pad2d_4, convolution_6, unsqueeze_330, primals_14, squeeze_37, view_27}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={squeeze_38, primals_13, mean_6, unsqueeze_331, squeeze_36, reflection_pad2d_4, convolution_6, unsqueeze_330, primals_14, squeeze_37, view_27}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={squeeze_38, unsqueeze_332, primals_13, mean_6, unsqueeze_331, squeeze_36, reflection_pad2d_4, convolution_6, unsqueeze_330, primals_14, squeeze_37, view_27}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf44, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_12, convolution_5, view_22, reflection_pad2d_3, primals_11, mean_5}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={primals_12, convolution_5, view_22, reflection_pad2d_3, primals_11, squeeze_30, mean_5}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={convolution_5, reflection_pad2d_3, squeeze_31, squeeze_30, primals_12, view_22, primals_11, mean_5}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={convolution_5, reflection_pad2d_3, squeeze_31, squeeze_30, primals_12, view_22, primals_11, mean_5, squeeze_32}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={convolution_5, reflection_pad2d_3, squeeze_31, squeeze_30, primals_12, unsqueeze_342, view_22, primals_11, mean_5, squeeze_32}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={convolution_5, reflection_pad2d_3, squeeze_31, squeeze_30, primals_12, unsqueeze_342, view_22, unsqueeze_343, primals_11, mean_5, squeeze_32}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={convolution_5, reflection_pad2d_3, squeeze_31, unsqueeze_344, squeeze_30, primals_12, unsqueeze_342, view_22, unsqueeze_343, primals_11, mean_5, squeeze_32}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf37, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_10, mean_4, convolution_4, primals_9, view_20, reflection_pad2d_2}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={primals_10, mean_4, primals_9, convolution_4, reflection_pad2d_2, squeeze_24, view_20}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mean_4, convolution_4, reflection_pad2d_2, primals_10, primals_9, squeeze_24, view_20, squeeze_25}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={mean_4, convolution_4, primals_10, squeeze_26, primals_9, reflection_pad2d_2, squeeze_24, view_20, squeeze_25}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={mean_4, convolution_4, primals_10, squeeze_26, primals_9, reflection_pad2d_2, squeeze_24, unsqueeze_354, view_20, squeeze_25}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mean_4, convolution_4, unsqueeze_355, primals_10, squeeze_26, primals_9, reflection_pad2d_2, squeeze_24, unsqueeze_354, view_20, squeeze_25}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={mean_4, convolution_4, unsqueeze_355, primals_10, squeeze_26, primals_9, reflection_pad2d_2, squeeze_24, unsqueeze_354, unsqueeze_356, view_20, squeeze_25}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf30, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={primals_8, reflection_pad2d_1, view_15, primals_7, convolution_3, mean_3}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={view_15, primals_8, reflection_pad2d_1, squeeze_18, primals_7, convolution_3, mean_3}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={primals_8, squeeze_19, reflection_pad2d_1, squeeze_18, primals_7, convolution_3, mean_3, view_15}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={primals_8, squeeze_19, reflection_pad2d_1, squeeze_18, squeeze_20, primals_7, convolution_3, mean_3, view_15}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={primals_8, squeeze_19, reflection_pad2d_1, squeeze_18, squeeze_20, primals_7, convolution_3, mean_3, view_15, unsqueeze_366}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={primals_8, squeeze_19, reflection_pad2d_1, squeeze_18, squeeze_20, primals_7, convolution_3, mean_3, unsqueeze_367, view_15, unsqueeze_366}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={primals_8, squeeze_19, reflection_pad2d_1, squeeze_18, squeeze_20, primals_7, convolution_3, unsqueeze_368, mean_3, unsqueeze_367, view_15, unsqueeze_366}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf23, i1) / index_expr(4096, torch.float32),
                  ranges=[1, 256, 1, 1],
                  origins={reciprocal_1, mul_1, sub_1, add, primals_1, primals_6, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
                )
              ),
              size=(1, 256, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={reciprocal_1, mul_1, sub_1, add, primals_1, primals_6, squeeze_12, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
            ),
            size=(1, 256),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={reciprocal_1, mul_1, sub_1, add, primals_1, primals_6, squeeze_13, squeeze_12, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
          ),
          size=(256,),
          reindex=lambda i0: [0, i0],
          origins={reciprocal_1, mul_1, sub_1, add, squeeze_14, primals_1, primals_6, squeeze_13, squeeze_12, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
        ),
        size=(1, 256),
        reindex=lambda i0, i1: [i1],
        origins={reciprocal_1, mul_1, sub_1, add, squeeze_14, primals_1, primals_6, squeeze_13, squeeze_12, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_378, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
      ),
      size=(1, 256, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={reciprocal_1, mul_1, sub_1, add, squeeze_14, unsqueeze_379, primals_1, primals_6, squeeze_13, squeeze_12, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_378, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
    ),
    size=(1, 256, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={reciprocal_1, mul_1, sub_1, add, squeeze_14, unsqueeze_379, primals_1, primals_6, squeeze_13, squeeze_12, unsqueeze_380, primals_5, new_zeros, view_10, view_7, view_8, view_6, relu, convolution, reciprocal, convolution_2, mul_2, unsqueeze_5, unsqueeze_378, unsqueeze_4, unsqueeze_3, full, unsqueeze_9, primals_2, add_1, unsqueeze_1, relu_1, unsqueeze_2, unsqueeze, view, mean_2, add_3, mul_5, reflection_pad2d, primals_3, sqrt, unsqueeze_11, sub, unsqueeze_10, new_zeros_1, mean, view_5, view_2, var, view_3, view_1, primals_4, mul_4, var_1, convolution_1, add_2, mean_1, sqrt_1}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf17, i1) / index_expr(16384, torch.float32),
                  ranges=[1, 128, 1, 1],
                  origins={unsqueeze_3, full, mul_1, primals_2, add_1, add, unsqueeze_1, unsqueeze_2, unsqueeze, primals_1, view, reflection_pad2d, primals_3, sqrt, new_zeros, sub, mean, view_5, view_2, var, view_3, relu, convolution, reciprocal, view_1, primals_4, mul_2, convolution_1, unsqueeze_5, mean_1, unsqueeze_4}
                )
              ),
              size=(1, 128, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, var, view_3, squeeze_6, view_1, primals_4, convolution_1, mean_1}
            ),
            size=(1, 128),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, squeeze_7, var, view_3, squeeze_6, view_1, primals_4, convolution_1, mean_1}
          ),
          size=(128,),
          reindex=lambda i0: [0, i0],
          origins={mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, squeeze_8, squeeze_7, var, view_3, squeeze_6, view_1, primals_4, convolution_1, mean_1}
        ),
        size=(1, 128),
        reindex=lambda i0, i1: [i1],
        origins={mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, squeeze_8, squeeze_7, var, view_3, squeeze_6, view_1, primals_4, convolution_1, mean_1, unsqueeze_390}
      ),
      size=(1, 128, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mul_1, add, primals_1, new_zeros, relu, convolution, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, squeeze_8, squeeze_7, unsqueeze_391, var, view_3, squeeze_6, view_1, primals_4, convolution_1, mean_1, unsqueeze_390}
    ),
    size=(1, 128, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={mul_1, add, primals_1, new_zeros, relu, convolution, unsqueeze_392, reciprocal, mul_2, unsqueeze_5, unsqueeze_4, unsqueeze_3, full, primals_2, add_1, unsqueeze_1, unsqueeze_2, unsqueeze, view, reflection_pad2d, primals_3, sqrt, sub, mean, view_5, view_2, squeeze_8, squeeze_7, unsqueeze_391, var, view_3, squeeze_6, view_1, primals_4, convolution_1, mean_1, unsqueeze_390}
  )
), TensorBox(
  View(
    View(
      View(
        View(
          View(
            View(
              StorageBox(
                Pointwise(
                  'cuda',
                  torch.float32,
                  load(buf8, i1) / index_expr(65536, torch.float32),
                  ranges=[1, 64, 1, 1],
                  origins={mean, convolution, view, primals_1, reflection_pad2d, primals_2}
                )
              ),
              size=(1, 64, 1),
              reindex=lambda i0, i1, i2: [0, i1, 0, 0],
              origins={mean, convolution, view, primals_1, reflection_pad2d, primals_2, squeeze}
            ),
            size=(1, 64),
            reindex=lambda i0, i1: [0, i1, 0],
            origins={mean, primals_2, squeeze, squeeze_1, convolution, view, primals_1, reflection_pad2d}
          ),
          size=(64,),
          reindex=lambda i0: [0, i0],
          origins={mean, primals_2, squeeze, convolution, squeeze_1, view, squeeze_2, primals_1, reflection_pad2d}
        ),
        size=(1, 64),
        reindex=lambda i0, i1: [i1],
        origins={mean, unsqueeze_402, primals_2, squeeze, convolution, squeeze_1, view, squeeze_2, primals_1, reflection_pad2d}
      ),
      size=(1, 64, 1),
      reindex=lambda i0, i1, i2: [0, i1],
      origins={mean, unsqueeze_402, primals_2, squeeze, unsqueeze_403, convolution, squeeze_1, view, squeeze_2, primals_1, reflection_pad2d}
    ),
    size=(1, 64, 1, 1),
    reindex=lambda i0, i1, i2, i3: [0, i1, 0],
    origins={mean, unsqueeze_404, unsqueeze_402, primals_2, squeeze, unsqueeze_403, convolution, squeeze_1, view, squeeze_2, primals_1, reflection_pad2d}
  )
), 1, 64, 256, 256, 128, 128, 128, 256, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 128, 256, 256]

While executing return [sub_23, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, reflection_pad2d, convolution, squeeze_5, view_3, convolution_1, squeeze_11, view_8, convolution_2, squeeze_17, view_13, reflection_pad2d_1, convolution_3, squeeze_23, view_18, reflection_pad2d_2, convolution_4, squeeze_29, add_10, reflection_pad2d_3, convolution_5, squeeze_35, view_25, reflection_pad2d_4, convolution_6, squeeze_41, add_15, reflection_pad2d_5, convolution_7, squeeze_47, view_32, reflection_pad2d_6, convolution_8, squeeze_53, add_20, reflection_pad2d_7, convolution_9, squeeze_59, view_39, reflection_pad2d_8, convolution_10, squeeze_65, add_25, reflection_pad2d_9, convolution_11, squeeze_71, view_46, reflection_pad2d_10, convolution_12, squeeze_77, add_30, reflection_pad2d_11, convolution_13, squeeze_83, view_53, reflection_pad2d_12, convolution_14, squeeze_89, add_35, reflection_pad2d_13, convolution_15, squeeze_95, view_60, reflection_pad2d_14, convolution_16, squeeze_101, add_40, reflection_pad2d_15, convolution_17, squeeze_107, view_67, reflection_pad2d_16, convolution_18, squeeze_113, add_45, reflection_pad2d_17, convolution_19, squeeze_119, view_74, reflection_pad2d_18, convolution_20, squeeze_125, add_50, convolution_21, squeeze_131, view_81, convolution_22, squeeze_137, view_86, reflection_pad2d_19, sub_23, unsqueeze_140, unsqueeze_152, unsqueeze_164, unsqueeze_176, unsqueeze_188, unsqueeze_200, unsqueeze_212, unsqueeze_224, unsqueeze_236, unsqueeze_248, unsqueeze_260, unsqueeze_272, unsqueeze_284, unsqueeze_296, unsqueeze_308, unsqueeze_320, unsqueeze_332, unsqueeze_344, unsqueeze_356, unsqueeze_368, unsqueeze_380, unsqueeze_392, unsqueeze_404, sym_size, mul, sym_size_1, sym_size_2, mul_3, sym_size_3, sym_size_4, mul_6, sym_size_5, sym_size_6, sym_size_7, sym_size_8, sym_size_9, sym_size_10, sym_size_11, sym_size_12, sym_size_13, sym_size_14, sym_size_15, sym_size_16, sym_size_17, sym_size_18, sym_size_19, sym_size_20, sym_size_21, sym_size_22, sym_size_23, sym_size_24, sym_size_25, sym_size_26, sym_size_27, sym_size_28, sym_size_29, sym_size_30, sym_size_31, sym_size_32, sym_size_33, sym_size_34, sym_size_35, sym_size_36, sym_size_37, sym_size_38, sym_size_39, sym_size_40, sym_size_41, sym_size_42, sym_size_43, sym_size_44, sym_size_45, sym_size_46]
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

--dataroot /scratch/ezyang/work/b/torchbenchmark/torchbenchmark/data/.data/pytorch_CycleGAN_and_pix2pix_inputs/datasets/horse2zebra --name horse2zebra --model cycle_gan --display_id 0 --n_epochs 3 --n_epochs_decay 3 --gpu_ids 0 --checkpoints_dir /scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/.data/checkpoints
TorchDynamo optimized model failed to run because of following error
cuda train pytorch_CycleGAN_and_pix2pix       FAIL
Running torchbench.py pytorch_stargan...
[2022-11-19 14:57:41,746] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 282, in output
    assert all(
AssertionError: [TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf7, i0) / index_expr(16, torch.float32),
    ranges=[64],
    origins={repeat_3, view_2, clone, mean, primals_53}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf8, i0) / index_expr(16, torch.float32),
    ranges=[64],
    origins={view_3, clone_1, mean_1, repeat_4, primals_54}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf15, i0) / index_expr(16, torch.float32),
    ranges=[128],
    origins={mean_2, repeat_7, primals_56, view_9, clone_2}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf16, i0) / index_expr(16, torch.float32),
    ranges=[128],
    origins={clone_3, repeat_8, primals_57, mean_3, view_10}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf23, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_11, clone_4, mean_4, view_16, primals_59}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf24, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={primals_60, repeat_12, clone_5, view_17, mean_5}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf31, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={mean_6, primals_62, clone_6, view_23, repeat_15}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf32, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={mean_7, primals_63, clone_7, view_24, repeat_16}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf39, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={primals_65, view_30, clone_8, repeat_19, mean_8}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf40, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_20, mean_9, view_31, clone_9, primals_66}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf46, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={mean_10, primals_68, view_34, repeat_23, clone_10}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf47, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_24, view_35, primals_69, clone_11, mean_11}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf54, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={primals_71, mean_12, view_41, repeat_27, clone_12}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf55, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={view_42, clone_13, primals_72, mean_13, repeat_28}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf61, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_31, mean_14, clone_14, primals_74, view_45}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf62, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_32, primals_75, view_46, mean_15, clone_15}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf69, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_35, mean_16, primals_77, view_52, clone_16}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf70, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={view_53, clone_17, primals_78, mean_17, repeat_36}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf76, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={primals_80, mean_18, view_56, clone_18, repeat_39}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf77, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={mean_19, repeat_40, primals_81, view_57, clone_19}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf84, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={clone_20, mean_20, primals_83, view_63, repeat_43}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf85, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={view_64, repeat_44, primals_84, clone_21, mean_21}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf91, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={primals_86, clone_22, mean_22, view_67, repeat_47}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf92, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_48, primals_87, mean_23, view_68, clone_23}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf99, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={repeat_51, mean_24, view_74, clone_24, primals_89}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf100, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={clone_25, view_75, repeat_52, primals_90, mean_25}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf106, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={primals_92, mean_26, clone_26, view_78, repeat_55}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf107, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={clone_27, primals_93, mean_27, view_79, repeat_56}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf114, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={mean_28, repeat_59, clone_28, view_85, primals_95}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf115, i0) / index_expr(16, torch.float32),
    ranges=[256],
    origins={mean_29, view_86, primals_96, clone_29, repeat_60}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf121, i0) / index_expr(16, torch.float32),
    ranges=[128],
    origins={mean_30, primals_98, view_89, clone_30, repeat_63}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf122, i0) / index_expr(16, torch.float32),
    ranges=[128],
    origins={view_90, repeat_64, mean_31, clone_31, primals_99}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf129, i0) / index_expr(16, torch.float32),
    ranges=[64],
    origins={clone_32, primals_101, repeat_67, mean_32, view_96}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    load(buf130, i0) / index_expr(16, torch.float32),
    ranges=[64],
    origins={repeat_68, clone_33, mean_33, primals_102, view_97}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    reciprocal(exp(load(buf133, i3 + 128 * i2 + 16384 * i1 + 49152 * i0) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
    ranges=torch.Size([16, 3, 128, 128]),
    origins={mul_68, mul_69, reciprocal_17, primals_52, view_100, sub_17, add_40, convolution_17, exp}
  )
)), TensorBox(StorageBox(
  InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 8, 7, 7], stride=[392, 49, 7, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[3, 64, 7, 7], stride=[3136, 49, 7, 1]))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[s0, 8, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), inputs=[ComputedBuffer(name='buf0', layout=AliasedLayout('cuda', torch.float32, size=[s0, s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_104, i3 + i1 * s2**2 + i2 * s2 + i0 * s1 * s2**2),
    ranges=[s0, s1, s2, s2],
    origins={cat}
  )), ComputedBuffer(name='buf1', layout=AliasedLayout('cuda', torch.float32, size=[s0, 8 - s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_105, i1 + i0 * 8 + -1 * s1),
    ranges=[s0, 8 - s1, s2, s2],
    origins={cat}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf4', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_2, ModularIndexing(i0, 1, 64)),
    ranges=[64*s0],
    origins={primals_2, repeat_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_53, ModularIndexing(i0, 1, 64)),
    ranges=[64*s0],
    origins={primals_53, clone, repeat_3}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf6', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_54, ModularIndexing(i0, 1, 64)),
    ranges=[64*s0],
    origins={repeat_4, primals_54, clone_1}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf3,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 64, 128, 128]), stride=(1048576, 16384, 128, 1)),
        inputs=[ConcatKernel(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[s0, 8, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), inputs=[ComputedBuffer(name='buf0', layout=AliasedLayout('cuda', torch.float32, size=[s0, s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(primals_104, i3 + i1 * s2**2 + i2 * s2 + i0 * s1 * s2**2),
          ranges=[s0, s1, s2, s2],
          origins={cat}
        )), ComputedBuffer(name='buf1', layout=AliasedLayout('cuda', torch.float32, size=[s0, 8 - s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(primals_105, i1 + i0 * 8 + -1 * s1),
          ranges=[s0, 8 - s1, s2, s2],
          origins={cat}
        ))]), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 8, 7, 7], stride=[392, 49, 7, 1]))],
        constant_args=(None, (1, 1), (3, 3), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={convolution, primals_1, cat}
      )
    ),
    size=(1, 1024, 128, 128),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
    origins={convolution, primals_1, cat, view_1}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf10', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf9, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)),
    ranges=(16, 64, 128, 128),
    origins={view_6}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_5, ModularIndexing(i0, 1, 128)),
    ranges=[128*s0],
    origins={repeat_5, primals_5}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf13', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_56, ModularIndexing(i0, 1, 128)),
    ranges=[128*s0],
    origins={repeat_7, clone_2, primals_56}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf14', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_57, ModularIndexing(i0, 1, 128)),
    ranges=[128*s0],
    origins={clone_3, repeat_8, primals_57}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf11,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 128, 64, 64]), stride=(524288, 4096, 64, 1)),
        inputs=[ComputedBuffer(name='buf10', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf9, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)),
          ranges=(16, 64, 128, 128),
          origins={view_6}
        )), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))],
        constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={primals_4, convolution_1, view_6}
      )
    ),
    size=(1, 2048, 64, 64),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
    origins={view_8, primals_4, convolution_1, view_6}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf17, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)),
    ranges=(16, 128, 64, 64),
    origins={view_13}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf20', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_8, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_9, primals_8}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf21', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_59, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_11, primals_59, clone_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf22', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_60, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_12, clone_5, primals_60}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf19,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf17, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)),
          ranges=(16, 128, 64, 64),
          origins={view_13}
        )), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))],
        constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={view_13, convolution_2, primals_7}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={view_13, convolution_2, primals_7, view_15}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf25, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_20}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_11, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_13, primals_11}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf29', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_62, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_62, clone_6, repeat_15}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf30', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_63, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_7, repeat_16, primals_63}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf27,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf25, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_20}
        )), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={convolution_3, view_20, primals_10}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={convolution_3, view_20, view_22, primals_10}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf33, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_27}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf36', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_14, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_17, primals_14}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_65, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_65, repeat_19, clone_8}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf38', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_66, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_66, repeat_20, clone_9}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf35,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf33, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_27}
        )), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={convolution_4, primals_13, view_27}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={convolution_4, view_29, primals_13, view_27}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf41', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf26, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf35, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf37, i1 + 256 * i0) * reciprocal(sqrt(load(buf38, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf36, i1 + 256 * i0) + load(primals_15, ModularIndexing(i1, 1, 256)),
    ranges=(16, 256, 32, 32),
    origins={mul_17, clone_8, view_27, repeat_17, unsqueeze_35, primals_65, repeat_18, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, clone_9, view_20, unsqueeze_37, primals_66, unsqueeze_38, mul_18, reciprocal_4, unsqueeze_34, unsqueeze_32, primals_15, sqrt_4, view_29, unsqueeze_33, add_8, repeat_19, primals_13, add_10, view_32, repeat_20, primals_14, convolution_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf43', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_17, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_21, primals_17}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_68, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_68, clone_10, repeat_23}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf45', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_69, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_24, primals_69, clone_11}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf42,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf41', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf26, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf35, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf37, i1 + 256 * i0) * reciprocal(sqrt(load(buf38, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf36, i1 + 256 * i0) + load(primals_15, ModularIndexing(i1, 1, 256)),
          ranges=(16, 256, 32, 32),
          origins={mul_17, clone_8, view_27, repeat_17, unsqueeze_35, primals_65, repeat_18, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, clone_9, view_20, unsqueeze_37, primals_66, unsqueeze_38, mul_18, reciprocal_4, unsqueeze_34, unsqueeze_32, primals_15, sqrt_4, view_29, unsqueeze_33, add_8, repeat_19, primals_13, add_10, view_32, repeat_20, primals_14, convolution_4}
        )), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={mul_17, clone_8, view_27, repeat_17, convolution_5, unsqueeze_35, primals_65, repeat_18, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, clone_9, view_20, unsqueeze_37, primals_66, unsqueeze_38, mul_18, reciprocal_4, unsqueeze_34, unsqueeze_32, primals_15, sqrt_4, view_29, unsqueeze_33, primals_16, add_8, repeat_19, primals_13, add_10, view_32, repeat_20, primals_14, convolution_4}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={mul_17, clone_8, view_27, repeat_17, convolution_5, unsqueeze_35, primals_65, repeat_18, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, clone_9, view_20, unsqueeze_37, primals_66, unsqueeze_38, mul_18, reciprocal_4, unsqueeze_34, unsqueeze_32, primals_15, sqrt_4, view_29, view_33, unsqueeze_33, primals_16, add_8, repeat_19, primals_13, add_10, view_32, repeat_20, primals_14, convolution_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf49', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf48, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_38}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_20, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_20, repeat_25}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf52', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_71, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_27, primals_71, clone_12}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf53', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_72, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_13, primals_72, repeat_28}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf50,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf49', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf48, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_38}
        )), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={primals_19, view_38, convolution_6}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={view_38, primals_19, view_40, convolution_6}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf41, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf50, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf52, i1 + 256 * i0) * reciprocal(sqrt(load(buf53, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf51, i1 + 256 * i0) + load(primals_21, ModularIndexing(i1, 1, 256)),
    ranges=(16, 256, 32, 32),
    origins={mul_17, clone_8, view_27, unsqueeze_50, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, unsqueeze_55, primals_66, mul_27, add_15, add_14, repeat_25, convolution_6, reciprocal_6, view_32, view_43, primals_19, primals_21, primals_14, repeat_17, unsqueeze_35, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, unsqueeze_34, sqrt_6, unsqueeze_32, view_40, primals_15, sqrt_4, view_29, unsqueeze_48, unsqueeze_33, add_8, mul_25, repeat_19, primals_13, view_38, add_10, primals_72, repeat_28, repeat_27, repeat_20, primals_71, convolution_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_23, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_29, primals_23}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf59', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_74, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_31, primals_74, clone_14}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf60', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_75, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_32, primals_75, clone_15}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf57,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf41, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf50, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf52, i1 + 256 * i0) * reciprocal(sqrt(load(buf53, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf51, i1 + 256 * i0) + load(primals_21, ModularIndexing(i1, 1, 256)),
          ranges=(16, 256, 32, 32),
          origins={mul_17, clone_8, view_27, unsqueeze_50, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, unsqueeze_55, primals_66, mul_27, add_15, add_14, repeat_25, convolution_6, reciprocal_6, view_32, view_43, primals_19, primals_21, primals_14, repeat_17, unsqueeze_35, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, unsqueeze_34, sqrt_6, unsqueeze_32, view_40, primals_15, sqrt_4, view_29, unsqueeze_48, unsqueeze_33, add_8, mul_25, repeat_19, primals_13, view_38, add_10, primals_72, repeat_28, repeat_27, repeat_20, primals_71, convolution_4}
        )), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={mul_17, unsqueeze_50, clone_8, view_27, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, unsqueeze_55, primals_66, mul_27, add_15, add_14, repeat_25, convolution_7, convolution_6, reciprocal_6, view_32, view_43, primals_19, primals_21, primals_14, primals_22, repeat_17, unsqueeze_35, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, unsqueeze_34, sqrt_6, unsqueeze_32, view_40, primals_15, sqrt_4, view_29, unsqueeze_48, unsqueeze_33, add_8, mul_25, repeat_19, primals_13, view_38, add_10, primals_72, repeat_28, repeat_27, repeat_20, primals_71, convolution_4}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={mul_17, unsqueeze_50, view_27, clone_8, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, unsqueeze_55, primals_66, mul_27, add_15, add_14, repeat_25, convolution_7, convolution_6, reciprocal_6, view_32, view_43, primals_19, primals_21, primals_14, view_44, primals_22, repeat_17, unsqueeze_35, add_9, mul_19, unsqueeze_39, sub_4, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, unsqueeze_34, sqrt_6, unsqueeze_32, view_40, primals_15, sqrt_4, view_29, unsqueeze_48, unsqueeze_33, add_8, mul_25, repeat_19, primals_13, view_38, add_10, primals_72, repeat_28, repeat_27, repeat_20, primals_71, convolution_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf64', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf63, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_49}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf66', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_26, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_26, repeat_33}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_77, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_35, primals_77, clone_16}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf68', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_78, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_17, primals_78, repeat_36}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf65,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf64', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf63, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_49}
        )), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={primals_25, view_49, convolution_8}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={primals_25, view_49, view_51, convolution_8}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf71', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf56, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf65, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf67, i1 + 256 * i0) * reciprocal(sqrt(load(buf68, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf66, i1 + 256 * i0) + load(primals_27, ModularIndexing(i1, 1, 256)),
    ranges=(16, 256, 32, 32),
    origins={primals_78, mul_17, unsqueeze_50, clone_8, view_27, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, view_54, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, primals_27, unsqueeze_55, primals_66, mul_27, add_15, add_14, clone_17, repeat_25, convolution_6, reciprocal_6, unsqueeze_67, view_32, view_43, reciprocal_8, primals_19, convolution_8, primals_21, sub_8, primals_14, unsqueeze_68, unsqueeze_69, unsqueeze_70, unsqueeze_71, add_20, mul_34, mul_35, add_19, repeat_17, repeat_33, unsqueeze_35, add_9, unsqueeze_66, mul_19, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, add_18, unsqueeze_34, primals_26, sqrt_6, unsqueeze_32, view_40, view_51, sqrt_4, primals_15, view_29, unsqueeze_48, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, mul_25, mul_33, repeat_19, primals_13, view_38, view_49, primals_72, add_10, repeat_36, repeat_28, repeat_27, primals_25, repeat_20, primals_71, convolution_4, repeat_34}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf73', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_29, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_29, repeat_37}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_80, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_18, primals_80, repeat_39}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf75', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_81, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_40, primals_81, clone_19}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf72,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf71', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf56, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf65, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf67, i1 + 256 * i0) * reciprocal(sqrt(load(buf68, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf66, i1 + 256 * i0) + load(primals_27, ModularIndexing(i1, 1, 256)),
          ranges=(16, 256, 32, 32),
          origins={primals_78, mul_17, unsqueeze_50, clone_8, view_27, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, view_54, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, primals_27, unsqueeze_55, primals_66, mul_27, add_15, add_14, clone_17, repeat_25, convolution_6, reciprocal_6, unsqueeze_67, view_32, view_43, reciprocal_8, primals_19, convolution_8, primals_21, sub_8, primals_14, unsqueeze_68, unsqueeze_69, unsqueeze_70, unsqueeze_71, add_20, mul_34, mul_35, add_19, repeat_17, repeat_33, unsqueeze_35, add_9, unsqueeze_66, mul_19, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, add_18, unsqueeze_34, primals_26, sqrt_6, unsqueeze_32, view_40, view_51, sqrt_4, primals_15, view_29, unsqueeze_48, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, mul_25, mul_33, repeat_19, primals_13, view_38, view_49, primals_72, add_10, repeat_36, repeat_28, repeat_27, primals_25, repeat_20, primals_71, convolution_4, repeat_34}
        )), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={primals_28, primals_78, mul_17, unsqueeze_50, view_27, clone_8, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, view_54, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, clone_9, primals_27, unsqueeze_55, primals_66, mul_27, add_15, add_14, clone_17, repeat_25, convolution_6, reciprocal_6, unsqueeze_67, view_32, view_43, reciprocal_8, convolution_9, primals_19, convolution_8, primals_21, sub_8, primals_14, unsqueeze_68, unsqueeze_69, unsqueeze_70, unsqueeze_71, add_20, mul_34, mul_35, add_19, repeat_17, repeat_33, unsqueeze_35, add_9, unsqueeze_66, mul_19, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, add_18, unsqueeze_34, primals_26, sqrt_6, unsqueeze_32, view_40, view_51, sqrt_4, primals_15, view_29, unsqueeze_48, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, mul_25, mul_33, repeat_19, primals_13, view_38, view_49, primals_72, add_10, repeat_36, repeat_28, repeat_27, primals_25, repeat_20, primals_71, convolution_4, repeat_34}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={primals_28, primals_78, mul_17, unsqueeze_50, view_27, clone_8, primals_20, repeat_26, clone_13, primals_65, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, view_54, unsqueeze_52, clone_12, unsqueeze_53, view_55, unsqueeze_54, clone_9, primals_27, unsqueeze_55, primals_66, mul_27, add_15, add_14, clone_17, repeat_25, convolution_6, reciprocal_6, unsqueeze_67, view_32, view_43, reciprocal_8, convolution_9, primals_19, convolution_8, primals_21, sub_8, primals_14, unsqueeze_68, unsqueeze_69, unsqueeze_70, unsqueeze_71, add_20, mul_34, mul_35, add_19, repeat_17, repeat_33, unsqueeze_35, add_9, unsqueeze_66, mul_19, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, view_20, unsqueeze_37, unsqueeze_38, unsqueeze_49, mul_18, reciprocal_4, add_13, add_18, unsqueeze_34, primals_26, sqrt_6, unsqueeze_32, view_40, view_51, sqrt_4, primals_15, view_29, unsqueeze_48, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, mul_25, mul_33, repeat_19, primals_13, view_38, view_49, primals_72, add_10, repeat_36, repeat_28, repeat_27, primals_25, repeat_20, primals_71, convolution_4, repeat_34}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf79', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf78, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_60}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_32, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_32, repeat_41}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf82', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_83, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_20, primals_83, repeat_43}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_84, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_44, primals_84, clone_21}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf80,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf79', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf78, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_60}
        )), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={view_60, convolution_10, primals_31}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={view_60, view_62, convolution_10, primals_31}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf86', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf71, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf80, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf82, i1 + 256 * i0) * reciprocal(sqrt(load(buf83, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf81, i1 + 256 * i0) + load(primals_33, ModularIndexing(i1, 1, 256)),
    ranges=(16, 256, 32, 32),
    origins={reciprocal_10, clone_21, primals_20, repeat_26, primals_65, add_25, view_54, repeat_42, convolution_10, clone_9, add_15, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, repeat_20, repeat_34, primals_78, mul_17, unsqueeze_50, view_27, clone_8, clone_13, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_25, view_43, reciprocal_8, primals_19, convolution_8, primals_21, primals_14, primals_83, add_20, primals_84, view_20, clone_20, unsqueeze_49, reciprocal_4, add_13, view_65, sqrt_6, view_40, primals_15, unsqueeze_48, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, primals_31, primals_25, primals_71, convolution_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf88', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_35, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_35, repeat_45}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf89', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_86, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_86, clone_22, repeat_47}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_87, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_48, clone_23, primals_87}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf87,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf86', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf71, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf80, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf82, i1 + 256 * i0) * reciprocal(sqrt(load(buf83, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf81, i1 + 256 * i0) + load(primals_33, ModularIndexing(i1, 1, 256)),
          ranges=(16, 256, 32, 32),
          origins={reciprocal_10, clone_21, primals_20, repeat_26, primals_65, add_25, view_54, repeat_42, convolution_10, clone_9, add_15, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, repeat_20, repeat_34, primals_78, mul_17, unsqueeze_50, view_27, clone_8, clone_13, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_25, view_43, reciprocal_8, primals_19, convolution_8, primals_21, primals_14, primals_83, add_20, primals_84, view_20, clone_20, unsqueeze_49, reciprocal_4, add_13, view_65, sqrt_6, view_40, primals_15, unsqueeze_48, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, primals_31, primals_25, primals_71, convolution_4}
        )), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={reciprocal_10, clone_21, repeat_26, primals_20, primals_65, view_54, add_25, repeat_42, convolution_10, clone_9, add_15, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, sub_10, unsqueeze_69, unsqueeze_83, unsqueeze_70, unsqueeze_82, unsqueeze_71, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, primals_34, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, repeat_20, repeat_34, primals_78, mul_17, unsqueeze_50, view_27, clone_8, clone_13, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_25, view_43, reciprocal_8, primals_19, convolution_8, primals_21, primals_14, primals_83, add_20, primals_84, view_20, clone_20, unsqueeze_49, reciprocal_4, add_13, view_65, sqrt_6, view_40, primals_15, unsqueeze_48, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, convolution_11, primals_31, primals_25, primals_71, convolution_4}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={reciprocal_10, clone_21, primals_20, repeat_26, primals_65, add_25, view_54, repeat_42, convolution_10, clone_9, add_15, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, unsqueeze_65, unsqueeze_33, clone_16, repeat_35, add_8, primals_34, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, repeat_20, repeat_34, primals_78, mul_17, unsqueeze_50, view_27, clone_8, clone_13, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, unsqueeze_52, clone_12, unsqueeze_53, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_25, view_43, reciprocal_8, primals_19, convolution_8, primals_21, primals_14, primals_83, add_20, primals_84, view_20, clone_20, unsqueeze_49, reciprocal_4, add_13, view_65, sqrt_6, view_40, primals_15, unsqueeze_48, view_66, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, convolution_11, primals_31, primals_25, primals_71, convolution_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf93, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_71}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf96', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_38, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_38, repeat_49}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf97', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_89, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_51, clone_24, primals_89}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf98', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_90, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_90, clone_25, repeat_52}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf95,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf93, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_71}
        )), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={convolution_12, view_71, primals_37}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={view_73, view_71, primals_37, convolution_12}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf86, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf95, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf97, i1 + 256 * i0) * reciprocal(sqrt(load(buf98, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf96, i1 + 256 * i0) + load(primals_39, ModularIndexing(i1, 1, 256)),
    ranges=(16, 256, 32, 32),
    origins={reciprocal_10, clone_21, repeat_26, primals_20, primals_65, view_54, add_25, repeat_42, convolution_10, clone_9, add_15, clone_25, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, sub_10, unsqueeze_69, unsqueeze_83, unsqueeze_70, unsqueeze_82, unsqueeze_71, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, mul_49, view_43, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, primals_84, view_20, clone_20, add_30, unsqueeze_49, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, view_40, primals_15, unsqueeze_48, convolution_12, clone_24, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, primals_31, primals_37, primals_25, primals_71, convolution_4}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_41, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_53, primals_41}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf104', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_92, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_26, primals_92, repeat_55}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf105', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_93, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={clone_27, primals_93, repeat_56}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf102,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf86, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf95, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf97, i1 + 256 * i0) * reciprocal(sqrt(load(buf98, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf96, i1 + 256 * i0) + load(primals_39, ModularIndexing(i1, 1, 256)),
          ranges=(16, 256, 32, 32),
          origins={reciprocal_10, clone_21, repeat_26, primals_20, primals_65, view_54, add_25, repeat_42, convolution_10, clone_9, add_15, clone_25, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, sub_10, unsqueeze_69, unsqueeze_83, unsqueeze_70, unsqueeze_82, unsqueeze_71, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, mul_49, view_43, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, primals_84, view_20, clone_20, add_30, unsqueeze_49, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, view_40, primals_15, unsqueeze_48, convolution_12, clone_24, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, primals_31, primals_37, primals_25, primals_71, convolution_4}
        )), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={reciprocal_10, clone_21, primals_20, repeat_26, primals_65, add_25, view_54, repeat_42, convolution_10, clone_9, add_15, clone_25, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, convolution_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, mul_26, repeat_18, primals_40, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, mul_49, view_43, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, primals_84, view_20, clone_20, add_30, unsqueeze_49, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, view_40, primals_15, unsqueeze_48, convolution_12, clone_24, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, primals_31, primals_37, primals_25, primals_71, convolution_4}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={reciprocal_10, clone_21, repeat_26, primals_20, primals_65, view_54, add_25, repeat_42, convolution_10, clone_9, add_15, clone_25, convolution_6, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, sub_10, unsqueeze_69, unsqueeze_83, unsqueeze_70, unsqueeze_82, unsqueeze_71, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, repeat_33, unsqueeze_35, view_62, primals_33, view_77, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, convolution_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, mul_26, repeat_18, primals_40, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, mul_49, view_43, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, primals_84, view_20, clone_20, add_30, unsqueeze_49, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, view_40, primals_15, unsqueeze_48, convolution_12, clone_24, mul_25, view_38, add_10, repeat_41, repeat_28, repeat_27, primals_31, primals_37, primals_25, primals_71, convolution_4}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf109', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf108, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
    ranges=(16, 256, 32, 32),
    origins={view_82}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf111', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_44, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={repeat_57, primals_44}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_95, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_95, repeat_59, clone_28}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf113', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_96, ModularIndexing(i0, 1, 256)),
    ranges=[256*s0],
    origins={primals_96, clone_29, repeat_60}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf110,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=(262144, 1024, 32, 1)),
        inputs=[ComputedBuffer(name='buf109', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf108, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)),
          ranges=(16, 256, 32, 32),
          origins={view_82}
        )), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
        constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={convolution_14, view_82, primals_43}
      )
    ),
    size=(1, 4096, 32, 32),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
    origins={convolution_14, primals_43, view_82, view_84}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf116', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf101, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf110, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf112, i1 + 256 * i0) * reciprocal(sqrt(load(buf113, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf111, i1 + 256 * i0) + load(primals_45, ModularIndexing(i1, 1, 256)),
    ranges=(16, 256, 32, 32),
    origins={reciprocal_10, clone_21, primals_20, repeat_26, add_33, sqrt_14, primals_65, mul_57, add_25, view_54, repeat_42, convolution_10, clone_9, view_82, add_15, repeat_60, repeat_59, repeat_58, clone_25, convolution_6, add_35, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, convolution_14, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, reciprocal_14, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, clone_28, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, primals_44, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, primals_43, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, mul_49, primals_96, view_43, primals_45, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, view_87, primals_84, clone_29, view_20, clone_20, add_30, unsqueeze_49, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, unsqueeze_118, view_40, primals_15, repeat_57, unsqueeze_48, unsqueeze_119, mul_58, mul_59, convolution_12, add_34, clone_24, mul_25, unsqueeze_117, view_38, add_10, repeat_41, repeat_28, unsqueeze_116, unsqueeze_115, unsqueeze_112, primals_95, repeat_27, primals_31, unsqueeze_113, primals_37, unsqueeze_114, primals_25, primals_71, sub_14, convolution_4, view_84}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf118', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_47, ModularIndexing(i0, 1, 128)),
    ranges=[128*s0],
    origins={repeat_61, primals_47}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_98, ModularIndexing(i0, 1, 128)),
    ranges=[128*s0],
    origins={primals_98, clone_30, repeat_63}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf120', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_99, ModularIndexing(i0, 1, 128)),
    ranges=[128*s0],
    origins={clone_31, primals_99, repeat_64}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf117,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 128, 64, 64]), stride=(524288, 4096, 64, 1)),
        inputs=[ComputedBuffer(name='buf116', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf101, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf110, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf112, i1 + 256 * i0) * reciprocal(sqrt(load(buf113, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf111, i1 + 256 * i0) + load(primals_45, ModularIndexing(i1, 1, 256)),
          ranges=(16, 256, 32, 32),
          origins={reciprocal_10, clone_21, primals_20, repeat_26, add_33, sqrt_14, primals_65, mul_57, add_25, view_54, repeat_42, convolution_10, clone_9, view_82, add_15, repeat_60, repeat_59, repeat_58, clone_25, convolution_6, add_35, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, convolution_14, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, reciprocal_14, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, clone_28, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, primals_44, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, primals_43, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, mul_49, primals_96, view_43, primals_45, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, view_87, primals_84, clone_29, view_20, clone_20, add_30, unsqueeze_49, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, unsqueeze_118, view_40, primals_15, repeat_57, unsqueeze_48, unsqueeze_119, mul_58, mul_59, convolution_12, add_34, clone_24, mul_25, unsqueeze_117, view_38, add_10, repeat_41, repeat_28, unsqueeze_116, unsqueeze_115, unsqueeze_112, primals_95, repeat_27, primals_31, unsqueeze_113, primals_37, unsqueeze_114, primals_25, primals_71, sub_14, convolution_4, view_84}
        )), InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))],
        constant_args=(None, (2, 2), (1, 1), (1, 1), True, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={reciprocal_10, clone_21, repeat_26, add_33, primals_20, sqrt_14, primals_65, mul_57, view_54, add_25, repeat_42, convolution_10, clone_9, view_82, add_15, repeat_60, repeat_59, repeat_58, clone_25, convolution_6, add_35, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, sub_10, unsqueeze_69, unsqueeze_83, unsqueeze_70, unsqueeze_82, unsqueeze_71, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, convolution_14, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, reciprocal_14, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, clone_28, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, primals_44, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, primals_43, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, primals_46, mul_49, primals_96, view_43, primals_45, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, view_87, primals_84, clone_29, view_20, clone_20, add_30, unsqueeze_49, convolution_15, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, unsqueeze_118, view_40, primals_15, repeat_57, unsqueeze_48, unsqueeze_119, mul_58, mul_59, convolution_12, add_34, clone_24, mul_25, unsqueeze_117, view_38, add_10, repeat_41, repeat_28, unsqueeze_116, unsqueeze_115, unsqueeze_112, primals_95, repeat_27, primals_31, unsqueeze_113, primals_37, unsqueeze_114, primals_25, primals_71, sub_14, convolution_4, view_84}
      )
    ),
    size=(1, 2048, 64, 64),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
    origins={reciprocal_10, clone_21, add_33, repeat_26, primals_20, sqrt_14, primals_65, mul_57, add_25, view_54, repeat_42, convolution_10, clone_9, view_82, add_15, repeat_60, repeat_59, repeat_58, clone_25, convolution_6, add_35, reciprocal_6, primals_32, mul_42, unsqueeze_67, add_24, view_32, mul_43, unsqueeze_87, unsqueeze_86, unsqueeze_85, sub_8, unsqueeze_68, unsqueeze_84, unsqueeze_69, sub_10, unsqueeze_70, unsqueeze_83, unsqueeze_71, unsqueeze_82, mul_34, unsqueeze_81, mul_35, sqrt_10, add_19, repeat_17, unsqueeze_80, convolution_14, repeat_33, unsqueeze_35, view_62, primals_33, add_9, unsqueeze_66, mul_19, add_23, unsqueeze_39, unsqueeze_64, sub_4, sqrt_8, unsqueeze_36, reciprocal_14, unsqueeze_37, unsqueeze_38, primals_38, mul_18, repeat_43, repeat_44, add_18, unsqueeze_34, primals_26, view_60, view_76, unsqueeze_32, view_51, sqrt_4, mul_41, view_29, repeat_49, unsqueeze_65, mul_50, unsqueeze_33, mul_51, clone_16, add_29, repeat_35, add_8, mul_33, repeat_19, primals_13, view_49, primals_72, repeat_36, unsqueeze_97, unsqueeze_103, unsqueeze_102, repeat_20, unsqueeze_100, clone_28, unsqueeze_98, unsqueeze_99, repeat_34, sub_12, unsqueeze_101, primals_78, mul_17, unsqueeze_50, view_27, clone_8, unsqueeze_96, clone_13, view_73, add_28, primals_44, mul_26, repeat_18, unsqueeze_51, primals_77, sub_6, sqrt_12, unsqueeze_52, clone_12, primals_43, unsqueeze_53, view_71, unsqueeze_54, primals_27, unsqueeze_55, primals_66, mul_27, add_14, clone_17, repeat_52, primals_89, repeat_25, repeat_51, primals_46, mul_49, primals_96, view_43, primals_45, reciprocal_8, primals_90, primals_19, convolution_8, primals_21, primals_14, primals_83, repeat_50, add_20, view_87, primals_84, view_88, clone_29, view_20, clone_20, add_30, unsqueeze_49, convolution_15, reciprocal_4, primals_39, add_13, view_65, sqrt_6, reciprocal_12, unsqueeze_118, view_40, primals_15, repeat_57, unsqueeze_48, unsqueeze_119, mul_58, mul_59, convolution_12, add_34, clone_24, mul_25, unsqueeze_117, view_38, add_10, repeat_41, repeat_28, unsqueeze_116, unsqueeze_115, unsqueeze_112, primals_95, repeat_27, primals_31, unsqueeze_113, primals_37, unsqueeze_114, primals_25, primals_71, sub_14, convolution_4, view_84}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf124', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf123, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)),
    ranges=(16, 128, 64, 64),
    origins={view_93}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf126', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_50, ModularIndexing(i0, 1, 64)),
    ranges=[64*s0],
    origins={primals_50, repeat_65}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf127', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_101, ModularIndexing(i0, 1, 64)),
    ranges=[64*s0],
    origins={repeat_67, clone_32, primals_101}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf128', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(primals_102, ModularIndexing(i0, 1, 64)),
    ranges=[64*s0],
    origins={repeat_68, primals_102, clone_33}
  ))
)), TensorBox(
  View(
    StorageBox(
      Convolution(
        name=buf125,
        layout=FixedLayout('cuda', torch.float32, size=torch.Size([16, 64, 128, 128]), stride=(1048576, 16384, 128, 1)),
        inputs=[ComputedBuffer(name='buf124', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
          'cuda',
          torch.float32,
          load(buf123, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)),
          ranges=(16, 128, 64, 64),
          origins={view_93}
        )), InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))],
        constant_args=(None, (2, 2), (1, 1), (1, 1), True, (0, 0), 1),
        kwargs={},
        output_view=None,
        origins={primals_49, view_93, convolution_16}
      )
    ),
    size=(1, 1024, 128, 128),
    reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
    origins={primals_49, view_95, view_93, convolution_16}
  )
), TensorBox(StorageBox(
  ComputedBuffer(name='buf132', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf131, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)),
    ranges=(16, 64, 128, 128),
    origins={view_100}
  ))
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.float32,
    reciprocal(exp(load(buf133, i3 + 128 * i2 + 16384 * i1 + 49152 * i0) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
    ranges=torch.Size([16, 3, 128, 128]),
    origins={mul_68, mul_69, reciprocal_17, primals_52, view_100, sub_17, add_40, convolution_17, exp}
  )
)), s0, 128, 128, 1024, 64, 64, 2048, 32, 32, 4096, 32, 32, 4096, 32, 32, 32, 32, 4096, 32, 32, 32, 32, 4096, 32, 32, 32, 32, 4096, 32, 32, 32, 32, 4096, 32, 32, 32, 32, 4096, 32, 32, 64, 64, 2048, 128, 128, 1024]

While executing return [alias_1, alias_4, alias_7, alias_10, alias_13, alias_16, alias_19, alias_22, alias_25, alias_28, alias_31, alias_34, alias_37, alias_40, alias_43, alias_46, alias_49, alias_52, alias_55, alias_58, alias_61, alias_64, alias_67, alias_70, alias_73, alias_76, alias_79, alias_82, alias_85, alias_88, alias_91, alias_94, alias_97, alias_100, sub_17, primals_1, primals_4, primals_7, primals_10, primals_13, primals_16, primals_19, primals_22, primals_25, primals_28, primals_31, primals_34, primals_37, primals_40, primals_43, primals_46, primals_49, primals_52, cat, repeat_1, repeat_3, repeat_4, view_1, view_6, repeat_5, repeat_7, repeat_8, view_8, view_13, repeat_9, repeat_11, repeat_12, view_15, view_20, repeat_13, repeat_15, repeat_16, view_22, view_27, repeat_17, repeat_19, repeat_20, view_29, add_10, repeat_21, repeat_23, repeat_24, view_33, view_38, repeat_25, repeat_27, repeat_28, view_40, add_15, repeat_29, repeat_31, repeat_32, view_44, view_49, repeat_33, repeat_35, repeat_36, view_51, add_20, repeat_37, repeat_39, repeat_40, view_55, view_60, repeat_41, repeat_43, repeat_44, view_62, add_25, repeat_45, repeat_47, repeat_48, view_66, view_71, repeat_49, repeat_51, repeat_52, view_73, add_30, repeat_53, repeat_55, repeat_56, view_77, view_82, repeat_57, repeat_59, repeat_60, view_84, add_35, repeat_61, repeat_63, repeat_64, view_88, view_93, repeat_65, repeat_67, repeat_68, view_95, view_100, sub_17, sym_size_4, sym_size_5, sym_size_6, sym_size_7, sym_size_8, sym_size_9, sym_size_10, sym_size_11, sym_size_12, sym_size_13, sym_size_14, sym_size_15, sym_size_16, sym_size_17, sym_size_18, sym_size_19, sym_size_20, sym_size_21, sym_size_22, sym_size_23, sym_size_24, sym_size_25, sym_size_26, sym_size_27, sym_size_28, sym_size_29, sym_size_30, sym_size_31, sym_size_32, sym_size_33, sym_size_34, sym_size_35, sym_size_36, sym_size_37, sym_size_38, sym_size_39, sym_size_40, sym_size_41, sym_size_42, sym_size_43, sym_size_44, sym_size_45, sym_size_46, sym_size_47, sym_size_48, sym_size_49]
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train pytorch_stargan                    FAIL
Running torchbench.py pytorch_struct...
ERROR:common:'SymInt' object has no attribute 'size'
Traceback (most recent call last):
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/sympy-1.11.1-py3.10.egg/sympy/core/numbers.py", line 2095, in __new__
    ival = int(i)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/sympy-1.11.1-py3.10.egg/sympy/core/expr.py", line 320, in __int__
    raise TypeError("Cannot convert symbols to int")
TypeError: Cannot convert symbols to int

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 254, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 225, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 1714, in _new_constant
    size = [sympy.Integer(s) for s in size]
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 1714, in <listcomp>
    size = [sympy.Integer(s) for s in size]
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/sympy-1.11.1-py3.10.egg/sympy/core/cache.py", line 70, in wrapper
    retval = cfunc(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/sympy-1.11.1-py3.10.egg/sympy/core/numbers.py", line 2097, in __new__
    raise TypeError(
TypeError: Argument of Integer should be of numeric type, got s0.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 257, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: Argument of Integer should be of numeric type, got s0.
  target: aten.new_zeros.default
  args[0]: TensorBox(
    ReinterpretView(
      StorageBox(
        InputBuffer(name='tangents_1', layout=FixedLayout('cuda', torch.float32, size=[s0, s1, 30], stride=[30*s1, 30, 1]))
      ),
      FixedLayout('cuda', torch.float32, size=[s0, s1, 30, 1], stride=[30*s1, 30, 1, 1]),
      no origins?
    )
  )
  args[1]: [s0, s1, 30, 4771]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 338, in <graph break in forward_and_backward_pass>
    self.grad_scaler.scale(loss).backward()
  File "/scratch/ezyang/work/b/pytorch/torch/_tensor.py", line 473, in backward
    torch.autograd.backward(
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/__init__.py", line 197, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  File "/scratch/ezyang/work/b/pytorch/torch/autograd/function.py", line 270, in apply
    return user_fn(self, *args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1032, in backward
    CompiledFunction.compiled_bw = aot_config.bw_compiler(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 375, in bw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 132, in run
    msg = f"While executing {node.format_node(detailed=True)}"
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 476, in format_node
    f'args = {_format_arg(self.args, detailed=detailed)}, kwargs = {_format_arg(self.kwargs, detailed=detailed)})'
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in _format_arg
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in <genexpr>
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 81, in _format_arg
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 81, in <genexpr>
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 97, in _format_arg
    return f"%{arg} : Tensor[size={list(a.size())}, stride={list(a.stride())}]"
AttributeError: 'SymInt' object has no attribute 'size'
TorchDynamo optimized model failed to run because of following error
cuda train pytorch_struct                     FAIL
Running torchbench.py pytorch_unet...
[2022-11-19 14:58:32,786] torch._inductor.graph: [WARNING] Creating implicit fallback for:
  target: <function floor at 0x7fa274cc2ef0>
  args[0]: 80.0
[2022-11-19 14:58:32,788] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 254, in call_function
    out = lowerings[target](*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 225, in wrapped
    return decomp_fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/lowering.py", line 1033, in handler
    TensorBox.create, ir.FallbackKernel.create(kernel, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/ir.py", line 3002, in create
    ) = cls.process_kernel(kernel, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/ir.py", line 2396, in process_kernel
    example_output = kernel(*new_args, **new_kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/_symbolic_trace.py", line 813, in wrapped
    return orig_fn(*args, **kwargs)
TypeError: math.floor() takes exactly one argument (0 given)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 257, in call_function
    raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: math.floor() takes exactly one argument (0 given)
  target: <function floor at 0x7fa274cc2ef0>
  args[0]: 80.0

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 132, in run
    msg = f"While executing {node.format_node(detailed=True)}"
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 476, in format_node
    f'args = {_format_arg(self.args, detailed=detailed)}, kwargs = {_format_arg(self.kwargs, detailed=detailed)})'
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in _format_arg
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 85, in <genexpr>
    items = ', '.join(_format_arg(a, detailed=detailed) for idx, a in enumerate(arg) if idx < max_list_len)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/node.py", line 97, in _format_arg
    return f"%{arg} : Tensor[size={list(a.size())}, stride={list(a.stride())}]"
AttributeError: 'SymFloat' object has no attribute 'size'
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train pytorch_unet                       FAIL
Running torchbench.py resnet152...
cuda train resnet152                          PASS
Running torchbench.py resnet18...
cuda train resnet18                           PASS
Running torchbench.py resnet50...
cuda train resnet50                           PASS
Running torchbench.py resnet50_quantized_qat...
WARNING:common:fp64 golden ref were not generated for resnet50_quantized_qat
[2022-11-19 15:01:42,111] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,123] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,144] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,168] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,179] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,197] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,207] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,225] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,233] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,252] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,261] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,278] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,286] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,295] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,313] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,321] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,339] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,348] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,365] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,372] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,381] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,399] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,408] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,425] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,434] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,453] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,460] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,469] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,845] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,856] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,875] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,885] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,904] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,913] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,931] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,939] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,948] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,966] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,975] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:42,993] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,002] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,019] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,027] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,036] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,054] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,063] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,081] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,093] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,111] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,118] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,127] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,145] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,155] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,173] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,182] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,201] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,209] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,219] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,239] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,249] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,270] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,279] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,300] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,309] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,327] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,336] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,346] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,370] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,379] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,399] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,409] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,428] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,437] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,446] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,466] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,476] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,495] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,505] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,525] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,533] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,542] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,563] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,573] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,592] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,602] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,622] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,630] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,646] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,666] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,676] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,696] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,705] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,725] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,733] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,744] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,764] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,773] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,793] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,802] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,823] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,831] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,842] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,863] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,875] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,898] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,909] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,930] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,949] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,970] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,980] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:43,990] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,014] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,025] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,045] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,057] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,078] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,088] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,098] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,121] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,132] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,153] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,164] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,186] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,196] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,206] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,217] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,224] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-19 15:01:44,230] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
ERROR:common:expected size 64==0, stride 1==1 at dim=0
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/graph_module.py", line 660, in call_wrapped
    return self._wrapped_call(self, *args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/graph_module.py", line 279, in __call__
    raise e
  File "/scratch/ezyang/work/b/pytorch/torch/fx/graph_module.py", line 269, in __call__
    return super(self.cls, obj).__call__(*args, **kwargs)  # type: ignore[misc]
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "<eval_with_key>.8", line 4, in forward
    def forward(self, x : torch.Tensor) -> torch.Tensor:
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1537, in forward
    return compiled_function(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1507, in compiled_function
    return aot_dispatcher_function(args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 570, in g
    return f(*args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1065, in compiled_function
    outs = CompiledFunction.apply(*no_dupe_args_with_synthetic_bases)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 977, in forward
    fw_outs = call_func_with_args(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 595, in call_func_with_args
    out = normalize_as_list(f(args))
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 194, in run
    return model(new_inputs)
  File "/tmp/torchinductor_ezyang/ve/cvesfq5ns22rq4yxug6nwtl7ymfv4pyelrxs2rkglf353t4u2z2z.py", line 2668, in call
    assert_size_stride(buf19, (0, ), (1, ))
AssertionError: expected size 64==0, stride 1==1 at dim=0
TorchDynamo optimized model failed to run because of following error
cuda train resnet50_quantized_qat             FAIL
Running torchbench.py resnext50_32x4d...
cuda train resnext50_32x4d                    PASS
Running torchbench.py shufflenet_v2_x1_0...
[2022-11-19 15:03:02,282] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 282, in output
    assert all(
AssertionError: [TensorBox(StorageBox(
  MatrixMultiplyAdd(
    name=buf161,
    layout=FlexibleLayout('cuda', torch.float32, size=[2, 1000], stride=[1000, 1]),
    inputs=[InputBuffer(name='primals_170', layout=FixedLayout('cuda', torch.float32, size=[1000], stride=[1])), ComputedBuffer(name='buf160', layout=FlexibleLayout('cuda', torch.float32, size=[2, 1024], stride=[1024, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf159, i1 + 1024 * i0) / index_expr(49, torch.float32),
      ranges=[2, 1024],
      origins={primals_166, relu_36, unsqueeze_442, convolution_55, unsqueeze_440, sqrt_55, unsqueeze_446, mul_165, view_31, mul_167, reciprocal_55, unsqueeze_447, add_110, mean, add_111, primals_336, unsqueeze_443, primals_168, unsqueeze_441, primals_167, unsqueeze_445, unsqueeze_444, primals_337, sub_55, mul_166}
    )), ReinterpretView(
      StorageBox(
        InputBuffer(name='primals_169', layout=FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]))
      ),
      FixedLayout('cuda', torch.float32, size=[1024, 1000], stride=[1, 1024]),
      no origins?
    )],
    constant_args=(),
    kwargs={'beta': 1, 'alpha': 1},
    output_view=None,
    origins={addmm, primals_166, relu_36, unsqueeze_442, convolution_55, unsqueeze_440, sqrt_55, unsqueeze_446, mul_165, view_31, mul_167, reciprocal_55, unsqueeze_447, add_110, mean, primals_169, add_111, primals_336, unsqueeze_443, primals_168, unsqueeze_441, primals_167, permute_16, unsqueeze_445, unsqueeze_444, primals_337, sub_55, primals_170, mul_166}
  )
)), TensorBox(StorageBox(
  InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[24, 3, 3, 3], stride=[27, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[24, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_8', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_14', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_20', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_26', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_32', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_38', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_44', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_50', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_56', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_59', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_62', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_64', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_65', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_67', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_68', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_70', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_71', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_73', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_74', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_76', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_77', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_79', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_80', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_82', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_83', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_85', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_86', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_88', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_89', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_91', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_92', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_94', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_95', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_97', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_98', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_100', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_101', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_103', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_104', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_106', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_107', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_109', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_110', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_112', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_113', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_115', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_116', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_118', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_119', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_121', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_122', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_124', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_125', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_127', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_128', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_130', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_131', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_133', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_134', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_136', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_137', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_139', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_140', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_142', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_143', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_145', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_146', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_148', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_149', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_151', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_152', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_154', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_155', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_157', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_158', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_160', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_161', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_163', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_164', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_166', layout=FixedLayout('cuda', torch.float32, size=[1024, 464, 1, 1], stride=[464, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_167', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_171', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_172', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_174', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_175', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_177', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_178', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_180', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_181', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_183', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_184', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_186', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_187', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_189', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_190', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_192', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_193', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_195', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_196', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_198', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_199', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_201', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_202', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_204', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_205', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_207', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_208', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_210', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_211', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_213', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_214', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_216', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_217', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_219', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_220', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_222', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_223', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_225', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_226', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_228', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_229', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_231', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_232', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_234', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_235', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_237', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_238', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_240', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_241', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_243', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_244', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_246', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_247', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_249', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_250', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_252', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_253', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_255', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_256', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_258', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_259', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_261', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_262', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_264', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_265', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_267', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_268', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_270', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_271', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_273', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_274', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_276', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_277', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_279', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_280', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_282', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_283', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_285', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_286', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_288', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_289', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_291', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_292', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_294', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_295', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_297', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_298', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_300', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_301', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_303', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_304', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_306', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_307', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_309', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_310', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_312', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_313', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_315', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_316', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_318', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_319', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_321', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_322', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_324', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_325', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_327', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_328', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_330', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_331', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_333', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_334', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_336', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_337', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_339', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
  Convolution(
    name=buf0,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 112, 112]), stride=(301056, 12544, 112, 1)),
    inputs=[InputBuffer(name='primals_339', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1])), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[24, 3, 3, 3], stride=[27, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_339, convolution, primals_1}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf1', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 112, 112]), stride=[301056, 12544, 112, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf0, i3 + 112 * i2 + 12544 * i1 + 301056 * i0) - load(primals_171, i1) * reciprocal(sqrt(load(primals_172, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_2, i1) + load(primals_3, i1)),
    ranges=torch.Size([2, 24, 112, 112]),
    origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, unsqueeze_3, mul_1, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
    ranges=[2, 24, 56, 56],
    origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, unsqueeze_3, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf3', layout=FlexibleLayout('cuda', torch.int64, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
    'cuda',
    torch.int64,
    where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))))))), index_expr(113 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))), index_expr(112 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))))), index_expr(111 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))), index_expr(1 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))), index_expr(2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))), index_expr(-1 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)), index_expr(-111 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), index_expr(-112 + 2 * i3 + 224 * i2, torch.int64), index_expr(-113 + 2 * i3 + 224 * i2, torch.int64))))))))),
    ranges=[2, 24, 56, 56],
    origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, unsqueeze_3, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf4,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=(18816, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
      ranges=[2, 24, 56, 56],
      origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, unsqueeze_3, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
    )), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[24, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 24),
    kwargs={},
    output_view=None,
    origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, primals_4, unsqueeze_3, convolution_1, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf4, i3 + 28 * i2 + 784 * i1 + 18816 * i0) - load(primals_174, i1) * reciprocal(sqrt(load(primals_175, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_5, i1) + load(primals_6, i1),
    ranges=torch.Size([2, 24, 28, 28]),
    origins={reciprocal_1, unsqueeze_10, sqrt, unsqueeze_9, primals_172, unsqueeze_8, primals_171, unsqueeze_14, unsqueeze_1, sub_1, primals_174, mul, unsqueeze, unsqueeze_2, add_3, primals_4, unsqueeze_3, mul_5, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, unsqueeze_15, unsqueeze_6, primals_3, primals_175, unsqueeze_5, primals_5, unsqueeze_4, unsqueeze_7, add_1, primals_1, primals_6, sqrt_1, relu, convolution, mul_3, primals_2, add, convolution_1, unsqueeze_13, unsqueeze_12, mul_4, add_2, reciprocal, unsqueeze_11}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf6,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf4, i3 + 28 * i2 + 784 * i1 + 18816 * i0) - load(primals_174, i1) * reciprocal(sqrt(load(primals_175, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_5, i1) + load(primals_6, i1),
      ranges=torch.Size([2, 24, 28, 28]),
      origins={reciprocal_1, unsqueeze_10, sqrt, unsqueeze_9, primals_172, unsqueeze_8, primals_171, unsqueeze_14, unsqueeze_1, sub_1, primals_174, mul, unsqueeze, unsqueeze_2, add_3, primals_4, unsqueeze_3, mul_5, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, unsqueeze_15, unsqueeze_6, primals_3, primals_175, unsqueeze_5, primals_5, unsqueeze_4, unsqueeze_7, add_1, primals_1, primals_6, sqrt_1, relu, convolution, mul_3, primals_2, add, convolution_1, unsqueeze_13, unsqueeze_12, mul_4, add_2, reciprocal, unsqueeze_11}
    )), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={reciprocal_1, unsqueeze_10, sqrt, unsqueeze_9, primals_172, unsqueeze_8, primals_171, unsqueeze_14, unsqueeze_1, sub_1, primals_174, mul, unsqueeze, unsqueeze_2, add_3, primals_4, unsqueeze_3, mul_5, mul_1, convolution_2, max_pool2d_with_indices, primals_7, primals_339, mul_2, sub, unsqueeze_15, unsqueeze_6, primals_3, primals_175, unsqueeze_5, primals_5, unsqueeze_4, unsqueeze_7, add_1, primals_1, primals_6, sqrt_1, relu, convolution, mul_3, primals_2, add, convolution_1, unsqueeze_13, unsqueeze_12, mul_4, add_2, reciprocal, unsqueeze_11}
  )
)), TensorBox(StorageBox(
  Convolution(
    name=buf8,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=(181888, 3136, 56, 1)),
    inputs=[ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
      ranges=[2, 24, 56, 56],
      origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, unsqueeze_3, mul_1, max_pool2d_with_indices, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
    )), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_4, sqrt, unsqueeze_7, add_1, primals_10, primals_172, primals_1, primals_171, unsqueeze_1, relu, mul, unsqueeze, convolution, primals_2, add, unsqueeze_2, unsqueeze_3, mul_1, max_pool2d_with_indices, convolution_3, primals_339, mul_2, sub, reciprocal, unsqueeze_6, primals_3, unsqueeze_5}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf8, i3 + 56 * i2 + 3136 * i1 + 181888 * i0) - load(primals_180, i1) * reciprocal(sqrt(load(primals_181, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_11, i1) + load(primals_12, i1)),
    ranges=torch.Size([2, 58, 56, 56]),
    origins={sqrt_3, sqrt, primals_172, primals_171, unsqueeze_1, mul, unsqueeze, unsqueeze_24, unsqueeze_25, unsqueeze_29, add_6, mul_10, unsqueeze_2, unsqueeze_27, unsqueeze_3, mul_1, unsqueeze_26, unsqueeze_28, max_pool2d_with_indices, convolution_3, primals_339, mul_2, unsqueeze_30, sub_3, sub, primals_12, unsqueeze_6, primals_3, unsqueeze_5, unsqueeze_4, primals_181, primals_11, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_10, primals_1, relu_2, relu, mul_9, unsqueeze_31, convolution, primals_2, reciprocal_3, add, reciprocal}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf10,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf8, i3 + 56 * i2 + 3136 * i1 + 181888 * i0) - load(primals_180, i1) * reciprocal(sqrt(load(primals_181, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_11, i1) + load(primals_12, i1)),
      ranges=torch.Size([2, 58, 56, 56]),
      origins={sqrt_3, sqrt, primals_172, primals_171, unsqueeze_1, mul, unsqueeze, unsqueeze_24, unsqueeze_25, unsqueeze_29, add_6, mul_10, unsqueeze_2, unsqueeze_27, unsqueeze_3, mul_1, unsqueeze_26, unsqueeze_28, max_pool2d_with_indices, convolution_3, primals_339, mul_2, unsqueeze_30, sub_3, sub, primals_12, unsqueeze_6, primals_3, unsqueeze_5, unsqueeze_4, primals_181, primals_11, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_10, primals_1, relu_2, relu, mul_9, unsqueeze_31, convolution, primals_2, reciprocal_3, add, reciprocal}
    )), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 58),
    kwargs={},
    output_view=None,
    origins={sqrt_3, sqrt, primals_172, convolution_4, primals_171, unsqueeze_1, mul, unsqueeze, unsqueeze_24, unsqueeze_25, unsqueeze_29, add_6, mul_10, unsqueeze_2, unsqueeze_27, unsqueeze_3, mul_1, unsqueeze_26, unsqueeze_28, max_pool2d_with_indices, convolution_3, primals_339, mul_2, unsqueeze_30, sub_3, sub, primals_12, primals_13, unsqueeze_6, primals_3, unsqueeze_5, unsqueeze_4, primals_181, primals_11, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_10, primals_1, relu_2, relu, mul_9, unsqueeze_31, convolution, primals_2, reciprocal_3, add, reciprocal}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf10, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_183, i1) * reciprocal(sqrt(load(primals_184, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_14, i1) + load(primals_15, i1),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={add_9, primals_172, primals_171, mul_14, mul, add_6, primals_15, primals_13, convolution_3, primals_339, primals_12, primals_14, primals_11, primals_10, primals_1, mul_9, unsqueeze_35, convolution, primals_2, reciprocal_3, add, reciprocal, mul_12, sqrt_3, sqrt, add_8, convolution_4, unsqueeze_1, unsqueeze_24, unsqueeze, unsqueeze_25, unsqueeze_29, mul_10, unsqueeze_2, unsqueeze_27, unsqueeze_3, unsqueeze_34, mul_1, unsqueeze_26, unsqueeze_28, max_pool2d_with_indices, primals_184, unsqueeze_30, mul_2, sub_3, sub, unsqueeze_6, primals_3, unsqueeze_5, unsqueeze_4, primals_181, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_183, relu_2, unsqueeze_39, relu, unsqueeze_31, unsqueeze_37, sqrt_4, reciprocal_4, unsqueeze_36, sub_4, mul_13, unsqueeze_33, unsqueeze_38, unsqueeze_32}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf12,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf10, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_183, i1) * reciprocal(sqrt(load(primals_184, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_14, i1) + load(primals_15, i1),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={add_9, primals_172, primals_171, mul_14, mul, add_6, primals_15, primals_13, convolution_3, primals_339, primals_12, primals_14, primals_11, primals_10, primals_1, mul_9, unsqueeze_35, convolution, primals_2, reciprocal_3, add, reciprocal, mul_12, sqrt_3, sqrt, add_8, convolution_4, unsqueeze_1, unsqueeze_24, unsqueeze, unsqueeze_25, unsqueeze_29, mul_10, unsqueeze_2, unsqueeze_27, unsqueeze_3, unsqueeze_34, mul_1, unsqueeze_26, unsqueeze_28, max_pool2d_with_indices, primals_184, unsqueeze_30, mul_2, sub_3, sub, unsqueeze_6, primals_3, unsqueeze_5, unsqueeze_4, primals_181, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_183, relu_2, unsqueeze_39, relu, unsqueeze_31, unsqueeze_37, sqrt_4, reciprocal_4, unsqueeze_36, sub_4, mul_13, unsqueeze_33, unsqueeze_38, unsqueeze_32}
    )), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={add_9, primals_172, primals_171, mul_14, mul, primals_16, add_6, primals_15, primals_13, convolution_5, convolution_3, primals_339, primals_12, primals_14, primals_11, primals_10, primals_1, mul_9, unsqueeze_35, convolution, primals_2, reciprocal_3, add, reciprocal, mul_12, sqrt_3, sqrt, add_8, convolution_4, unsqueeze_1, unsqueeze_24, unsqueeze, unsqueeze_25, unsqueeze_29, mul_10, unsqueeze_2, unsqueeze_27, unsqueeze_3, unsqueeze_34, mul_1, unsqueeze_26, unsqueeze_28, max_pool2d_with_indices, primals_184, unsqueeze_30, mul_2, sub_3, sub, unsqueeze_6, primals_3, unsqueeze_5, unsqueeze_4, primals_181, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_183, relu_2, unsqueeze_39, relu, unsqueeze_31, unsqueeze_37, sqrt_4, reciprocal_4, unsqueeze_36, sub_4, mul_13, unsqueeze_33, unsqueeze_38, unsqueeze_32}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf14, i3 + 28 * i2 + 784 * ModularIndexing(58 + i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
    ranges=[2, 58, 28, 28],
    origins={getitem_3}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf16,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf14, i3 + 28 * i2 + 784 * ModularIndexing(58 + i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
      ranges=[2, 58, 28, 28],
      origins={getitem_3}
    )), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_19, convolution_6, getitem_3}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf16, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_189, i1) * reciprocal(sqrt(load(primals_190, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_20, i1) + load(primals_21, i1)),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={unsqueeze_55, sub_6, unsqueeze_54, unsqueeze_53, add_12, primals_20, primals_190, unsqueeze_52, unsqueeze_51, primals_19, primals_189, sqrt_6, mul_19, unsqueeze_50, unsqueeze_48, reciprocal_6, getitem_3, unsqueeze_49, relu_4, add_13, mul_18, convolution_6, mul_20, primals_21}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf18,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf16, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_189, i1) * reciprocal(sqrt(load(primals_190, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_20, i1) + load(primals_21, i1)),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={unsqueeze_55, sub_6, unsqueeze_54, unsqueeze_53, add_12, primals_20, primals_190, unsqueeze_52, unsqueeze_51, primals_19, primals_189, sqrt_6, mul_19, unsqueeze_50, unsqueeze_48, reciprocal_6, getitem_3, unsqueeze_49, relu_4, add_13, mul_18, convolution_6, mul_20, primals_21}
    )), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
    kwargs={},
    output_view=None,
    origins={unsqueeze_55, sub_6, unsqueeze_54, unsqueeze_53, add_12, primals_20, primals_190, unsqueeze_52, unsqueeze_51, primals_19, primals_189, sqrt_6, mul_19, unsqueeze_50, unsqueeze_48, reciprocal_6, getitem_3, unsqueeze_49, convolution_7, relu_4, primals_22, add_13, mul_18, convolution_6, mul_20, primals_21}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf18, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_192, i1) * reciprocal(sqrt(load(primals_193, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_23, i1) + load(primals_24, i1),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={primals_192, unsqueeze_60, unsqueeze_62, sub_7, add_12, primals_20, primals_190, unsqueeze_57, primals_19, primals_189, sqrt_6, add_14, reciprocal_6, add_15, getitem_3, mul_23, unsqueeze_63, add_13, mul_20, unsqueeze_55, sub_6, reciprocal_7, unsqueeze_54, unsqueeze_53, unsqueeze_52, unsqueeze_51, mul_19, unsqueeze_50, unsqueeze_48, sqrt_7, unsqueeze_49, unsqueeze_56, primals_24, convolution_7, relu_4, unsqueeze_61, primals_23, mul_22, primals_22, unsqueeze_59, mul_21, primals_193, mul_18, convolution_6, unsqueeze_58, primals_21}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf20,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf18, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_192, i1) * reciprocal(sqrt(load(primals_193, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_23, i1) + load(primals_24, i1),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={primals_192, unsqueeze_60, unsqueeze_62, sub_7, add_12, primals_20, primals_190, unsqueeze_57, primals_19, primals_189, sqrt_6, add_14, reciprocal_6, add_15, getitem_3, mul_23, unsqueeze_63, add_13, mul_20, unsqueeze_55, sub_6, reciprocal_7, unsqueeze_54, unsqueeze_53, unsqueeze_52, unsqueeze_51, mul_19, unsqueeze_50, unsqueeze_48, sqrt_7, unsqueeze_49, unsqueeze_56, primals_24, convolution_7, relu_4, unsqueeze_61, primals_23, mul_22, primals_22, unsqueeze_59, mul_21, primals_193, mul_18, convolution_6, unsqueeze_58, primals_21}
    )), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_192, unsqueeze_60, unsqueeze_62, sub_7, add_12, primals_20, primals_190, unsqueeze_57, primals_19, primals_189, sqrt_6, add_14, convolution_8, reciprocal_6, add_15, getitem_3, mul_23, unsqueeze_63, add_13, mul_20, unsqueeze_55, sub_6, reciprocal_7, unsqueeze_54, unsqueeze_53, unsqueeze_52, unsqueeze_51, mul_19, unsqueeze_50, unsqueeze_48, sqrt_7, unsqueeze_49, unsqueeze_56, primals_25, primals_24, convolution_7, relu_4, unsqueeze_61, primals_23, mul_22, primals_22, unsqueeze_59, mul_21, primals_193, mul_18, convolution_6, unsqueeze_58, primals_21}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf23, i3 + 28 * i2 + 784 * ModularIndexing(58 + i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
    ranges=[2, 58, 28, 28],
    origins={getitem_5}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf25,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf23, i3 + 28 * i2 + 784 * ModularIndexing(58 + i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
      ranges=[2, 58, 28, 28],
      origins={getitem_5}
    )), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_5, convolution_9, primals_28}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf25, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_198, i1) * reciprocal(sqrt(load(primals_199, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_29, i1) + load(primals_30, i1)),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={mul_29, unsqueeze_72, unsqueeze_78, unsqueeze_79, add_19, primals_29, convolution_9, primals_28, primals_198, reciprocal_9, primals_199, mul_27, unsqueeze_74, primals_30, relu_6, unsqueeze_77, mul_28, unsqueeze_73, unsqueeze_75, getitem_5, unsqueeze_76, sqrt_9, sub_9, add_18}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf27,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf25, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_198, i1) * reciprocal(sqrt(load(primals_199, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_29, i1) + load(primals_30, i1)),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={mul_29, unsqueeze_72, unsqueeze_78, unsqueeze_79, add_19, primals_29, convolution_9, primals_28, primals_198, reciprocal_9, primals_199, mul_27, unsqueeze_74, primals_30, relu_6, unsqueeze_77, mul_28, unsqueeze_73, unsqueeze_75, getitem_5, unsqueeze_76, sqrt_9, sub_9, add_18}
    )), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
    kwargs={},
    output_view=None,
    origins={mul_29, unsqueeze_72, unsqueeze_78, unsqueeze_79, convolution_10, add_19, primals_31, primals_29, convolution_9, primals_28, primals_198, reciprocal_9, primals_199, mul_27, unsqueeze_74, primals_30, relu_6, unsqueeze_77, mul_28, unsqueeze_73, unsqueeze_75, getitem_5, unsqueeze_76, sqrt_9, sub_9, add_18}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf27, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_201, i1) * reciprocal(sqrt(load(primals_202, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_32, i1) + load(primals_33, i1),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={unsqueeze_72, mul_30, convolution_10, add_19, add_21, mul_32, convolution_9, unsqueeze_87, sub_10, unsqueeze_86, unsqueeze_85, unsqueeze_84, mul_31, unsqueeze_74, add_20, unsqueeze_77, unsqueeze_83, mul_28, unsqueeze_75, getitem_5, unsqueeze_76, sqrt_9, sub_9, add_18, mul_29, unsqueeze_78, unsqueeze_79, primals_202, unsqueeze_81, primals_201, primals_29, primals_28, primals_198, reciprocal_9, primals_199, mul_27, primals_30, sqrt_10, relu_6, primals_32, unsqueeze_73, reciprocal_10, primals_31, primals_33, unsqueeze_80, unsqueeze_82}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf29,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf27, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_201, i1) * reciprocal(sqrt(load(primals_202, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_32, i1) + load(primals_33, i1),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={unsqueeze_72, mul_30, convolution_10, add_19, add_21, mul_32, convolution_9, unsqueeze_87, sub_10, unsqueeze_86, unsqueeze_85, unsqueeze_84, mul_31, unsqueeze_74, add_20, unsqueeze_77, unsqueeze_83, mul_28, unsqueeze_75, getitem_5, unsqueeze_76, sqrt_9, sub_9, add_18, mul_29, unsqueeze_78, unsqueeze_79, primals_202, unsqueeze_81, primals_201, primals_29, primals_28, primals_198, reciprocal_9, primals_199, mul_27, primals_30, sqrt_10, relu_6, primals_32, unsqueeze_73, reciprocal_10, primals_31, primals_33, unsqueeze_80, unsqueeze_82}
    )), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_72, mul_30, convolution_10, add_19, add_21, mul_32, convolution_9, unsqueeze_87, sub_10, unsqueeze_86, unsqueeze_85, unsqueeze_84, mul_31, unsqueeze_74, add_20, unsqueeze_77, unsqueeze_83, mul_28, unsqueeze_75, getitem_5, unsqueeze_76, sqrt_9, sub_9, add_18, mul_29, unsqueeze_78, unsqueeze_79, primals_202, unsqueeze_81, primals_201, primals_29, primals_28, convolution_11, primals_198, reciprocal_9, primals_199, mul_27, primals_30, sqrt_10, relu_6, primals_32, unsqueeze_73, reciprocal_10, primals_31, primals_33, unsqueeze_80, primals_34, unsqueeze_82}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf32, i3 + 28 * i2 + 784 * ModularIndexing(58 + i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
    ranges=[2, 58, 28, 28],
    origins={getitem_7}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf34,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf32, i3 + 28 * i2 + 784 * ModularIndexing(58 + i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
      ranges=[2, 58, 28, 28],
      origins={getitem_7}
    )), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_12, primals_37, getitem_7}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf34, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_207, i1) * reciprocal(sqrt(load(primals_208, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_38, i1) + load(primals_39, i1)),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={unsqueeze_100, unsqueeze_99, mul_37, primals_38, unsqueeze_101, getitem_7, relu_8, primals_37, unsqueeze_102, reciprocal_12, unsqueeze_98, unsqueeze_96, mul_38, primals_39, primals_208, add_24, convolution_12, primals_207, unsqueeze_103, add_25, sqrt_12, sub_12, mul_36, unsqueeze_97}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf36,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf34, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_207, i1) * reciprocal(sqrt(load(primals_208, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_38, i1) + load(primals_39, i1)),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={unsqueeze_100, unsqueeze_99, mul_37, primals_38, unsqueeze_101, getitem_7, relu_8, primals_37, unsqueeze_102, reciprocal_12, unsqueeze_98, unsqueeze_96, mul_38, primals_39, primals_208, add_24, convolution_12, primals_207, unsqueeze_103, add_25, sqrt_12, sub_12, mul_36, unsqueeze_97}
    )), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
    kwargs={},
    output_view=None,
    origins={unsqueeze_100, unsqueeze_99, mul_37, primals_38, unsqueeze_101, getitem_7, relu_8, primals_37, unsqueeze_102, reciprocal_12, unsqueeze_98, convolution_13, unsqueeze_96, primals_208, primals_39, mul_38, add_24, convolution_12, primals_207, unsqueeze_103, add_25, sqrt_12, sub_12, mul_36, primals_40, unsqueeze_97}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf36, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_210, i1) * reciprocal(sqrt(load(primals_211, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_41, i1) + load(primals_42, i1),
    ranges=torch.Size([2, 58, 28, 28]),
    origins={primals_211, unsqueeze_100, primals_210, primals_38, sqrt_13, getitem_7, relu_8, primals_37, mul_39, convolution_13, unsqueeze_96, primals_39, unsqueeze_108, convolution_12, unsqueeze_107, unsqueeze_106, unsqueeze_105, primals_42, mul_40, unsqueeze_109, primals_41, unsqueeze_110, unsqueeze_104, primals_40, add_27, add_26, unsqueeze_99, mul_41, mul_37, unsqueeze_111, unsqueeze_101, unsqueeze_102, reciprocal_12, unsqueeze_98, reciprocal_13, sub_13, primals_208, mul_38, add_24, primals_207, unsqueeze_103, add_25, sqrt_12, sub_12, mul_36, unsqueeze_97}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf38,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=(45472, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf36, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_210, i1) * reciprocal(sqrt(load(primals_211, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_41, i1) + load(primals_42, i1),
      ranges=torch.Size([2, 58, 28, 28]),
      origins={primals_211, unsqueeze_100, primals_210, primals_38, sqrt_13, getitem_7, relu_8, primals_37, mul_39, convolution_13, unsqueeze_96, primals_39, unsqueeze_108, convolution_12, unsqueeze_107, unsqueeze_106, unsqueeze_105, primals_42, mul_40, unsqueeze_109, primals_41, unsqueeze_110, unsqueeze_104, primals_40, add_27, add_26, unsqueeze_99, mul_41, mul_37, unsqueeze_111, unsqueeze_101, unsqueeze_102, reciprocal_12, unsqueeze_98, reciprocal_13, sub_13, primals_208, mul_38, add_24, primals_207, unsqueeze_103, add_25, sqrt_12, sub_12, mul_36, unsqueeze_97}
    )), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_211, unsqueeze_100, primals_210, primals_38, sqrt_13, getitem_7, relu_8, primals_37, mul_39, convolution_13, unsqueeze_96, primals_39, unsqueeze_108, convolution_12, unsqueeze_107, unsqueeze_106, primals_43, unsqueeze_105, primals_42, mul_40, unsqueeze_109, primals_41, unsqueeze_110, unsqueeze_104, primals_40, add_27, add_26, unsqueeze_99, mul_41, mul_37, unsqueeze_111, unsqueeze_101, unsqueeze_102, reciprocal_12, convolution_14, unsqueeze_98, reciprocal_13, sub_13, primals_208, mul_38, add_24, primals_207, unsqueeze_103, add_25, sqrt_12, sub_12, mul_36, unsqueeze_97}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf41, i3 + 28 * i2 + 784 * ModularIndexing(i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
    ranges=(2, 116, 28, 28),
    origins={view_7}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf43,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf41, i3 + 28 * i2 + 784 * ModularIndexing(i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
      ranges=(2, 116, 28, 28),
      origins={view_7}
    )), InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={primals_46, view_7, convolution_15}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf43, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_216, i1) * reciprocal(sqrt(load(primals_217, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_47, i1) + load(primals_48, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={mul_46, unsqueeze_123, sqrt_15, unsqueeze_121, convolution_15, mul_45, primals_217, primals_216, unsqueeze_120, add_30, sub_15, unsqueeze_124, unsqueeze_122, primals_48, reciprocal_15, add_31, view_7, primals_47, mul_47, unsqueeze_127, primals_46, unsqueeze_126, unsqueeze_125}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf45,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf43, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_216, i1) * reciprocal(sqrt(load(primals_217, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_47, i1) + load(primals_48, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={mul_46, unsqueeze_123, sqrt_15, unsqueeze_121, convolution_15, mul_45, primals_217, primals_216, unsqueeze_120, add_30, sub_15, unsqueeze_124, unsqueeze_122, primals_48, reciprocal_15, add_31, view_7, primals_47, mul_47, unsqueeze_127, primals_46, unsqueeze_126, unsqueeze_125}
    )), InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={mul_46, unsqueeze_123, sqrt_15, unsqueeze_121, convolution_15, convolution_16, mul_45, primals_217, primals_216, unsqueeze_120, add_30, sub_15, unsqueeze_124, primals_49, unsqueeze_122, primals_48, reciprocal_15, add_31, view_7, primals_47, mul_47, unsqueeze_127, primals_46, unsqueeze_126, unsqueeze_125}
  )
)), TensorBox(StorageBox(
  Convolution(
    name=buf47,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=(90944, 784, 28, 1)),
    inputs=[ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf41, i3 + 28 * i2 + 784 * ModularIndexing(i1, 2, 58) + 45472 * ModularIndexing(i1, 1, 2) + 90944 * i0),
      ranges=(2, 116, 28, 28),
      origins={view_7}
    )), InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_17, view_7, primals_52}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf48', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf47, i3 + 28 * i2 + 784 * i1 + 90944 * i0) - load(primals_222, i1) * reciprocal(sqrt(load(primals_223, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_53, i1) + load(primals_54, i1)),
    ranges=torch.Size([2, 116, 28, 28]),
    origins={unsqueeze_137, mul_51, add_35, mul_53, primals_52, primals_223, convolution_17, sqrt_17, relu_11, primals_53, unsqueeze_136, unsqueeze_143, unsqueeze_141, mul_52, primals_222, unsqueeze_139, view_7, unsqueeze_138, add_34, unsqueeze_140, unsqueeze_142, reciprocal_17, primals_54, sub_17}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf49,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf48', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf47, i3 + 28 * i2 + 784 * i1 + 90944 * i0) - load(primals_222, i1) * reciprocal(sqrt(load(primals_223, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_53, i1) + load(primals_54, i1)),
      ranges=torch.Size([2, 116, 28, 28]),
      origins={unsqueeze_137, mul_51, add_35, mul_53, primals_52, primals_223, convolution_17, sqrt_17, relu_11, primals_53, unsqueeze_136, unsqueeze_143, unsqueeze_141, mul_52, primals_222, unsqueeze_139, view_7, unsqueeze_138, add_34, unsqueeze_140, unsqueeze_142, reciprocal_17, primals_54, sub_17}
    )), InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={unsqueeze_137, convolution_18, mul_51, add_35, mul_53, primals_52, primals_223, convolution_17, sqrt_17, relu_11, primals_53, unsqueeze_136, unsqueeze_143, unsqueeze_141, mul_52, primals_222, unsqueeze_139, view_7, primals_55, unsqueeze_138, add_34, unsqueeze_140, unsqueeze_142, reciprocal_17, primals_54, sub_17}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf49, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_225, i1) * reciprocal(sqrt(load(primals_226, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_56, i1) + load(primals_57, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={mul_56, unsqueeze_151, unsqueeze_137, primals_226, unsqueeze_150, unsqueeze_149, unsqueeze_148, reciprocal_18, mul_55, mul_51, add_35, sub_18, mul_53, unsqueeze_147, relu_11, unsqueeze_146, sqrt_18, unsqueeze_144, unsqueeze_143, primals_57, primals_56, unsqueeze_145, view_7, primals_55, add_34, reciprocal_17, add_36, convolution_18, primals_52, primals_223, convolution_17, sqrt_17, primals_53, mul_54, unsqueeze_136, primals_225, unsqueeze_141, mul_52, primals_222, unsqueeze_139, unsqueeze_138, unsqueeze_140, unsqueeze_142, add_37, primals_54, sub_17}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf51,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf49, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_225, i1) * reciprocal(sqrt(load(primals_226, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_56, i1) + load(primals_57, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={mul_56, unsqueeze_151, unsqueeze_137, primals_226, unsqueeze_150, unsqueeze_149, unsqueeze_148, reciprocal_18, mul_55, mul_51, add_35, sub_18, mul_53, unsqueeze_147, relu_11, unsqueeze_146, sqrt_18, unsqueeze_144, unsqueeze_143, primals_57, primals_56, unsqueeze_145, view_7, primals_55, add_34, reciprocal_17, add_36, convolution_18, primals_52, primals_223, convolution_17, sqrt_17, primals_53, mul_54, unsqueeze_136, primals_225, unsqueeze_141, mul_52, primals_222, unsqueeze_139, unsqueeze_138, unsqueeze_140, unsqueeze_142, add_37, primals_54, sub_17}
    )), InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={mul_56, unsqueeze_151, unsqueeze_137, primals_226, unsqueeze_150, unsqueeze_149, unsqueeze_148, reciprocal_18, mul_55, mul_51, add_35, sub_18, mul_53, unsqueeze_147, relu_11, unsqueeze_146, sqrt_18, unsqueeze_144, primals_58, unsqueeze_143, primals_57, primals_56, unsqueeze_145, view_7, primals_55, add_34, reciprocal_17, add_36, convolution_18, primals_52, primals_223, convolution_17, sqrt_17, primals_53, mul_54, unsqueeze_136, primals_225, unsqueeze_141, mul_52, primals_222, unsqueeze_139, unsqueeze_138, unsqueeze_140, convolution_19, unsqueeze_142, add_37, primals_54, sub_17}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf53, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_9}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf55,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf53, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_9}
    )), InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_61, getitem_9, convolution_20}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf55, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_231, i1) * reciprocal(sqrt(load(primals_232, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_62, i1) + load(primals_63, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={unsqueeze_166, unsqueeze_165, mul_60, convolution_20, unsqueeze_164, mul_61, sub_20, mul_62, primals_61, add_41, primals_62, unsqueeze_163, primals_232, relu_13, primals_231, unsqueeze_161, getitem_9, add_40, unsqueeze_160, unsqueeze_162, sqrt_20, reciprocal_20, primals_63, unsqueeze_167}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf57,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf55, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_231, i1) * reciprocal(sqrt(load(primals_232, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_62, i1) + load(primals_63, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={unsqueeze_166, unsqueeze_165, mul_60, convolution_20, unsqueeze_164, mul_61, sub_20, mul_62, primals_61, add_41, primals_62, unsqueeze_163, primals_232, relu_13, primals_231, unsqueeze_161, getitem_9, add_40, unsqueeze_160, unsqueeze_162, sqrt_20, reciprocal_20, primals_63, unsqueeze_167}
    )), InputBuffer(name='primals_64', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={unsqueeze_166, unsqueeze_165, mul_60, convolution_20, unsqueeze_164, mul_61, sub_20, mul_62, primals_61, add_41, primals_62, convolution_21, unsqueeze_163, primals_232, relu_13, primals_64, primals_231, unsqueeze_161, getitem_9, add_40, unsqueeze_160, unsqueeze_162, sqrt_20, reciprocal_20, primals_63, unsqueeze_167}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf57, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_234, i1) * reciprocal(sqrt(load(primals_235, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_65, i1) + load(primals_66, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={unsqueeze_166, unsqueeze_165, mul_60, unsqueeze_164, add_43, mul_61, sub_20, mul_62, primals_61, add_41, primals_62, mul_65, unsqueeze_163, primals_232, relu_13, primals_231, sqrt_21, add_40, primals_234, reciprocal_21, sqrt_20, primals_63, primals_66, primals_235, convolution_20, primals_65, unsqueeze_168, unsqueeze_169, unsqueeze_174, convolution_21, unsqueeze_172, primals_64, add_42, unsqueeze_161, mul_64, getitem_9, unsqueeze_171, unsqueeze_173, unsqueeze_160, sub_21, unsqueeze_162, unsqueeze_175, reciprocal_20, unsqueeze_170, mul_63, unsqueeze_167}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf59,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf57, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_234, i1) * reciprocal(sqrt(load(primals_235, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_65, i1) + load(primals_66, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={unsqueeze_166, unsqueeze_165, mul_60, unsqueeze_164, add_43, mul_61, sub_20, mul_62, primals_61, add_41, primals_62, mul_65, unsqueeze_163, primals_232, relu_13, primals_231, sqrt_21, add_40, primals_234, reciprocal_21, sqrt_20, primals_63, primals_66, primals_235, convolution_20, primals_65, unsqueeze_168, unsqueeze_169, unsqueeze_174, convolution_21, unsqueeze_172, primals_64, add_42, unsqueeze_161, mul_64, getitem_9, unsqueeze_171, unsqueeze_173, unsqueeze_160, sub_21, unsqueeze_162, unsqueeze_175, reciprocal_20, unsqueeze_170, mul_63, unsqueeze_167}
    )), InputBuffer(name='primals_67', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_166, unsqueeze_165, mul_60, unsqueeze_164, add_43, mul_61, sub_20, mul_62, primals_61, add_41, primals_62, mul_65, unsqueeze_163, primals_232, relu_13, primals_231, sqrt_21, add_40, primals_234, reciprocal_21, sqrt_20, primals_63, primals_66, primals_235, convolution_20, primals_65, unsqueeze_168, unsqueeze_169, primals_67, unsqueeze_174, convolution_21, unsqueeze_172, primals_64, add_42, unsqueeze_161, mul_64, getitem_9, unsqueeze_171, unsqueeze_173, unsqueeze_160, sub_21, unsqueeze_162, unsqueeze_175, reciprocal_20, unsqueeze_170, mul_63, convolution_22, unsqueeze_167}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf62, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_11}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf64,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf62, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_11}
    )), InputBuffer(name='primals_70', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_11, primals_70, convolution_23}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf64, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_240, i1) * reciprocal(sqrt(load(primals_241, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_71, i1) + load(primals_72, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={sqrt_23, relu_15, primals_70, unsqueeze_185, unsqueeze_187, unsqueeze_184, add_46, unsqueeze_186, primals_72, reciprocal_23, primals_241, mul_69, add_47, primals_240, unsqueeze_191, convolution_23, sub_23, unsqueeze_190, unsqueeze_189, primals_71, unsqueeze_188, getitem_11, mul_70, mul_71}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf66,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf64, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_240, i1) * reciprocal(sqrt(load(primals_241, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_71, i1) + load(primals_72, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={sqrt_23, relu_15, primals_70, unsqueeze_185, unsqueeze_187, unsqueeze_184, add_46, unsqueeze_186, primals_72, reciprocal_23, primals_241, mul_69, add_47, primals_240, unsqueeze_191, convolution_23, sub_23, unsqueeze_190, unsqueeze_189, primals_71, unsqueeze_188, getitem_11, mul_70, mul_71}
    )), InputBuffer(name='primals_73', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={sqrt_23, primals_70, unsqueeze_185, relu_15, primals_73, unsqueeze_187, unsqueeze_184, add_46, unsqueeze_186, primals_72, reciprocal_23, primals_241, mul_69, add_47, primals_240, unsqueeze_191, convolution_23, sub_23, unsqueeze_190, unsqueeze_189, primals_71, unsqueeze_188, convolution_24, getitem_11, mul_70, mul_71}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf66, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_243, i1) * reciprocal(sqrt(load(primals_244, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_74, i1) + load(primals_75, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={unsqueeze_199, mul_74, primals_70, unsqueeze_185, primals_73, primals_75, unsqueeze_184, unsqueeze_186, add_49, reciprocal_23, add_48, reciprocal_24, add_47, unsqueeze_191, sub_23, unsqueeze_190, unsqueeze_189, unsqueeze_188, getitem_11, mul_70, mul_71, sqrt_23, relu_15, sqrt_24, unsqueeze_187, unsqueeze_192, add_46, primals_244, primals_243, unsqueeze_193, primals_241, mul_69, unsqueeze_194, primals_240, primals_74, convolution_23, sub_24, unsqueeze_197, unsqueeze_196, mul_73, primals_72, convolution_24, unsqueeze_195, mul_72, unsqueeze_198, primals_71}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf68,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf66, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_243, i1) * reciprocal(sqrt(load(primals_244, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_74, i1) + load(primals_75, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={unsqueeze_199, mul_74, primals_70, unsqueeze_185, primals_73, primals_75, unsqueeze_184, unsqueeze_186, add_49, reciprocal_23, add_48, reciprocal_24, add_47, unsqueeze_191, sub_23, unsqueeze_190, unsqueeze_189, unsqueeze_188, getitem_11, mul_70, mul_71, sqrt_23, relu_15, sqrt_24, unsqueeze_187, unsqueeze_192, add_46, primals_244, primals_243, unsqueeze_193, primals_241, mul_69, unsqueeze_194, primals_240, primals_74, convolution_23, sub_24, unsqueeze_197, unsqueeze_196, mul_73, primals_72, convolution_24, unsqueeze_195, mul_72, unsqueeze_198, primals_71}
    )), InputBuffer(name='primals_76', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_199, mul_74, primals_70, unsqueeze_185, convolution_25, primals_73, primals_75, primals_76, unsqueeze_184, unsqueeze_186, add_49, reciprocal_23, add_48, reciprocal_24, add_47, unsqueeze_191, sub_23, unsqueeze_190, unsqueeze_189, unsqueeze_188, getitem_11, mul_70, mul_71, sqrt_23, relu_15, sqrt_24, unsqueeze_187, unsqueeze_192, add_46, primals_244, primals_243, unsqueeze_193, primals_241, mul_69, unsqueeze_194, primals_240, primals_74, convolution_23, sub_24, unsqueeze_197, unsqueeze_196, mul_73, primals_72, convolution_24, unsqueeze_195, mul_72, unsqueeze_198, primals_71}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf71, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_13}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf73,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf71, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_13}
    )), InputBuffer(name='primals_79', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_13, convolution_26, primals_79}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf73, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_249, i1) * reciprocal(sqrt(load(primals_250, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_80, i1) + load(primals_81, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={sub_26, unsqueeze_214, unsqueeze_212, mul_78, primals_250, mul_79, unsqueeze_211, primals_249, unsqueeze_213, add_52, unsqueeze_210, sqrt_26, unsqueeze_209, unsqueeze_208, primals_81, reciprocal_26, relu_17, primals_80, primals_79, getitem_13, add_53, mul_80, convolution_26, unsqueeze_215}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf75,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf73, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_249, i1) * reciprocal(sqrt(load(primals_250, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_80, i1) + load(primals_81, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={sub_26, unsqueeze_214, unsqueeze_212, mul_78, primals_250, mul_79, unsqueeze_211, primals_249, unsqueeze_213, add_52, unsqueeze_210, sqrt_26, unsqueeze_209, unsqueeze_208, primals_81, reciprocal_26, relu_17, primals_80, primals_79, getitem_13, add_53, mul_80, convolution_26, unsqueeze_215}
    )), InputBuffer(name='primals_82', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={sub_26, unsqueeze_214, unsqueeze_212, mul_78, primals_250, mul_79, unsqueeze_211, primals_249, unsqueeze_213, add_52, unsqueeze_210, sqrt_26, unsqueeze_209, unsqueeze_208, primals_82, primals_81, reciprocal_26, relu_17, primals_80, primals_79, getitem_13, convolution_27, add_53, mul_80, convolution_26, unsqueeze_215}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf75, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_252, i1) * reciprocal(sqrt(load(primals_253, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_83, i1) + load(primals_84, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={unsqueeze_219, unsqueeze_218, unsqueeze_217, mul_82, mul_78, primals_250, unsqueeze_216, unsqueeze_221, primals_249, unsqueeze_220, mul_81, add_55, primals_252, sqrt_27, reciprocal_27, primals_82, primals_81, reciprocal_26, unsqueeze_222, relu_17, primals_80, primals_79, mul_83, unsqueeze_223, add_53, mul_80, unsqueeze_215, primals_253, sub_26, primals_83, unsqueeze_214, unsqueeze_212, mul_79, unsqueeze_211, sub_27, unsqueeze_213, add_52, unsqueeze_210, sqrt_26, unsqueeze_209, unsqueeze_208, primals_84, add_54, getitem_13, convolution_27, convolution_26}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf77,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf75, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_252, i1) * reciprocal(sqrt(load(primals_253, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_83, i1) + load(primals_84, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={unsqueeze_219, unsqueeze_218, unsqueeze_217, mul_82, mul_78, primals_250, unsqueeze_216, unsqueeze_221, primals_249, unsqueeze_220, mul_81, add_55, primals_252, sqrt_27, reciprocal_27, primals_82, primals_81, reciprocal_26, unsqueeze_222, relu_17, primals_80, primals_79, mul_83, unsqueeze_223, add_53, mul_80, unsqueeze_215, primals_253, sub_26, primals_83, unsqueeze_214, unsqueeze_212, mul_79, unsqueeze_211, sub_27, unsqueeze_213, add_52, unsqueeze_210, sqrt_26, unsqueeze_209, unsqueeze_208, primals_84, add_54, getitem_13, convolution_27, convolution_26}
    )), InputBuffer(name='primals_85', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_219, unsqueeze_218, unsqueeze_217, mul_82, mul_78, primals_250, unsqueeze_216, unsqueeze_221, convolution_28, primals_249, unsqueeze_220, mul_81, add_55, primals_252, sqrt_27, reciprocal_27, primals_82, primals_81, reciprocal_26, unsqueeze_222, relu_17, primals_80, primals_79, mul_83, unsqueeze_223, add_53, mul_80, unsqueeze_215, primals_253, sub_26, primals_83, unsqueeze_214, unsqueeze_212, mul_79, unsqueeze_211, sub_27, unsqueeze_213, add_52, unsqueeze_210, sqrt_26, unsqueeze_209, primals_85, unsqueeze_208, primals_84, add_54, getitem_13, convolution_27, convolution_26}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf80, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_15}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf82,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf80, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_15}
    )), InputBuffer(name='primals_88', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_29, primals_88, getitem_15}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf82, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_258, i1) * reciprocal(sqrt(load(primals_259, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_89, i1) + load(primals_90, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={sub_29, add_59, unsqueeze_234, mul_87, unsqueeze_233, unsqueeze_238, unsqueeze_232, add_58, unsqueeze_235, sqrt_29, relu_19, unsqueeze_236, primals_259, mul_88, primals_258, reciprocal_29, primals_88, getitem_15, primals_89, convolution_29, unsqueeze_237, primals_90, mul_89, unsqueeze_239}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf84,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf82, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_258, i1) * reciprocal(sqrt(load(primals_259, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_89, i1) + load(primals_90, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={sub_29, add_59, unsqueeze_234, mul_87, unsqueeze_233, unsqueeze_238, unsqueeze_232, add_58, unsqueeze_235, sqrt_29, relu_19, unsqueeze_236, primals_259, mul_88, primals_258, reciprocal_29, primals_88, getitem_15, primals_89, convolution_29, unsqueeze_237, primals_90, mul_89, unsqueeze_239}
    )), InputBuffer(name='primals_91', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={sub_29, add_59, unsqueeze_234, mul_87, unsqueeze_233, unsqueeze_238, unsqueeze_232, add_58, unsqueeze_235, sqrt_29, relu_19, unsqueeze_236, primals_259, mul_88, primals_258, reciprocal_29, primals_88, convolution_30, getitem_15, primals_89, convolution_29, unsqueeze_237, primals_91, primals_90, mul_89, unsqueeze_239}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf84, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_261, i1) * reciprocal(sqrt(load(primals_262, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_92, i1) + load(primals_93, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={add_59, unsqueeze_241, unsqueeze_234, mul_87, unsqueeze_233, unsqueeze_243, unsqueeze_232, mul_92, primals_91, unsqueeze_235, primals_92, unsqueeze_236, sub_30, mul_88, unsqueeze_246, unsqueeze_245, unsqueeze_244, reciprocal_29, mul_90, unsqueeze_247, convolution_30, add_61, add_60, mul_91, convolution_29, mul_89, sub_29, reciprocal_30, primals_262, unsqueeze_238, add_58, sqrt_29, relu_19, primals_259, primals_258, primals_261, sqrt_30, primals_88, getitem_15, primals_89, unsqueeze_242, primals_93, unsqueeze_237, unsqueeze_240, primals_90, unsqueeze_239}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf86,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf84, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_261, i1) * reciprocal(sqrt(load(primals_262, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_92, i1) + load(primals_93, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={add_59, unsqueeze_241, unsqueeze_234, mul_87, unsqueeze_233, unsqueeze_243, unsqueeze_232, mul_92, primals_91, unsqueeze_235, primals_92, unsqueeze_236, sub_30, mul_88, unsqueeze_246, unsqueeze_245, unsqueeze_244, reciprocal_29, mul_90, unsqueeze_247, convolution_30, add_61, add_60, mul_91, convolution_29, mul_89, sub_29, reciprocal_30, primals_262, unsqueeze_238, add_58, sqrt_29, relu_19, primals_259, primals_258, primals_261, sqrt_30, primals_88, getitem_15, primals_89, unsqueeze_242, primals_93, unsqueeze_237, unsqueeze_240, primals_90, unsqueeze_239}
    )), InputBuffer(name='primals_94', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={add_59, unsqueeze_241, unsqueeze_234, mul_87, unsqueeze_233, unsqueeze_243, unsqueeze_232, mul_92, primals_91, unsqueeze_235, primals_92, unsqueeze_236, sub_30, mul_88, unsqueeze_246, unsqueeze_245, unsqueeze_244, reciprocal_29, mul_90, unsqueeze_247, convolution_30, add_61, add_60, mul_91, convolution_29, convolution_31, mul_89, sub_29, reciprocal_30, primals_262, unsqueeze_238, add_58, sqrt_29, relu_19, primals_259, primals_258, primals_261, sqrt_30, primals_88, getitem_15, primals_89, unsqueeze_242, primals_94, primals_93, unsqueeze_237, unsqueeze_240, primals_90, unsqueeze_239}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf89, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_17}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf91,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf89, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_17}
    )), InputBuffer(name='primals_97', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_32, getitem_17, primals_97}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf92', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf91, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_267, i1) * reciprocal(sqrt(load(primals_268, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_98, i1) + load(primals_99, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={unsqueeze_261, primals_267, unsqueeze_260, primals_268, unsqueeze_259, primals_97, getitem_17, unsqueeze_258, primals_98, unsqueeze_257, primals_99, unsqueeze_256, mul_97, sqrt_32, sub_32, mul_98, reciprocal_32, add_65, convolution_32, relu_21, add_64, mul_96, unsqueeze_263, unsqueeze_262}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf93,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf92', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf91, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_267, i1) * reciprocal(sqrt(load(primals_268, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_98, i1) + load(primals_99, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={unsqueeze_261, primals_267, unsqueeze_260, primals_268, unsqueeze_259, primals_97, getitem_17, unsqueeze_258, primals_98, unsqueeze_257, primals_99, unsqueeze_256, mul_97, sqrt_32, sub_32, mul_98, reciprocal_32, add_65, convolution_32, relu_21, add_64, mul_96, unsqueeze_263, unsqueeze_262}
    )), InputBuffer(name='primals_100', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={unsqueeze_261, primals_267, unsqueeze_260, primals_268, unsqueeze_259, primals_97, getitem_17, primals_100, unsqueeze_258, primals_98, unsqueeze_257, primals_99, unsqueeze_256, mul_97, sqrt_32, sub_32, mul_98, reciprocal_32, add_65, convolution_32, convolution_33, relu_21, add_64, mul_96, unsqueeze_263, unsqueeze_262}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf93, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_270, i1) * reciprocal(sqrt(load(primals_271, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_101, i1) + load(primals_102, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={primals_267, reciprocal_33, primals_268, primals_97, getitem_17, unsqueeze_266, unsqueeze_264, primals_98, primals_271, unsqueeze_265, primals_270, sqrt_32, mul_99, unsqueeze_267, mul_101, sub_33, convolution_32, unsqueeze_270, convolution_33, unsqueeze_269, unsqueeze_268, unsqueeze_271, add_67, mul_96, unsqueeze_263, unsqueeze_262, mul_100, unsqueeze_261, unsqueeze_260, primals_101, unsqueeze_259, add_66, primals_100, unsqueeze_258, unsqueeze_257, primals_99, unsqueeze_256, primals_102, mul_97, sub_32, mul_98, reciprocal_32, add_65, relu_21, add_64, sqrt_33}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf95,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf93, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_270, i1) * reciprocal(sqrt(load(primals_271, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_101, i1) + load(primals_102, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={primals_267, reciprocal_33, primals_268, primals_97, getitem_17, unsqueeze_266, unsqueeze_264, primals_98, primals_271, unsqueeze_265, primals_270, sqrt_32, mul_99, unsqueeze_267, mul_101, sub_33, convolution_32, unsqueeze_270, convolution_33, unsqueeze_269, unsqueeze_268, unsqueeze_271, add_67, mul_96, unsqueeze_263, unsqueeze_262, mul_100, unsqueeze_261, unsqueeze_260, primals_101, unsqueeze_259, add_66, primals_100, unsqueeze_258, unsqueeze_257, primals_99, unsqueeze_256, primals_102, mul_97, sub_32, mul_98, reciprocal_32, add_65, relu_21, add_64, sqrt_33}
    )), InputBuffer(name='primals_103', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_267, reciprocal_33, primals_268, getitem_17, primals_97, unsqueeze_266, unsqueeze_264, primals_98, primals_271, unsqueeze_265, primals_270, sqrt_32, mul_99, unsqueeze_267, mul_101, sub_33, convolution_32, unsqueeze_270, convolution_33, unsqueeze_269, unsqueeze_268, unsqueeze_271, add_67, mul_96, unsqueeze_263, unsqueeze_262, mul_100, unsqueeze_261, unsqueeze_260, primals_101, unsqueeze_259, convolution_34, add_66, primals_100, unsqueeze_258, unsqueeze_257, primals_99, unsqueeze_256, primals_102, primals_103, mul_97, sub_32, mul_98, reciprocal_32, add_65, relu_21, add_64, sqrt_33}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf99', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf98, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_19}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf100,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf99', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf98, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_19}
    )), InputBuffer(name='primals_106', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={primals_106, convolution_35, getitem_19}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf100, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_276, i1) * reciprocal(sqrt(load(primals_277, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_107, i1) + load(primals_108, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={add_70, primals_277, relu_23, sqrt_35, getitem_19, add_71, convolution_35, mul_107, primals_108, unsqueeze_287, sub_35, unsqueeze_286, primals_106, unsqueeze_284, reciprocal_35, mul_106, mul_105, unsqueeze_283, primals_276, unsqueeze_285, unsqueeze_282, unsqueeze_281, primals_107, unsqueeze_280}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf102,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf100, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_276, i1) * reciprocal(sqrt(load(primals_277, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_107, i1) + load(primals_108, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={add_70, primals_277, relu_23, sqrt_35, getitem_19, add_71, convolution_35, mul_107, primals_108, unsqueeze_287, sub_35, unsqueeze_286, primals_106, unsqueeze_284, reciprocal_35, mul_106, mul_105, unsqueeze_283, primals_276, unsqueeze_285, unsqueeze_282, unsqueeze_281, primals_107, unsqueeze_280}
    )), InputBuffer(name='primals_109', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={add_70, primals_277, relu_23, convolution_36, sqrt_35, getitem_19, add_71, convolution_35, mul_107, primals_108, unsqueeze_287, sub_35, primals_109, unsqueeze_286, primals_106, unsqueeze_284, reciprocal_35, mul_106, mul_105, unsqueeze_283, primals_276, unsqueeze_285, unsqueeze_282, unsqueeze_281, primals_107, unsqueeze_280}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf102, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_279, i1) * reciprocal(sqrt(load(primals_280, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_110, i1) + load(primals_111, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={add_70, mul_108, convolution_36, sqrt_35, getitem_19, convolution_35, primals_108, unsqueeze_293, mul_109, unsqueeze_291, primals_106, unsqueeze_290, unsqueeze_292, unsqueeze_289, unsqueeze_294, primals_276, unsqueeze_288, sub_36, primals_107, sqrt_36, add_73, primals_277, relu_23, mul_110, primals_279, add_71, unsqueeze_295, add_72, mul_107, primals_280, unsqueeze_287, sub_35, primals_109, reciprocal_36, unsqueeze_286, primals_110, unsqueeze_284, primals_111, reciprocal_35, mul_106, mul_105, unsqueeze_283, unsqueeze_285, unsqueeze_282, unsqueeze_281, unsqueeze_280}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf104,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf102, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_279, i1) * reciprocal(sqrt(load(primals_280, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_110, i1) + load(primals_111, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={add_70, mul_108, convolution_36, sqrt_35, getitem_19, convolution_35, primals_108, unsqueeze_293, mul_109, unsqueeze_291, primals_106, unsqueeze_290, unsqueeze_292, unsqueeze_289, unsqueeze_294, primals_276, unsqueeze_288, sub_36, primals_107, sqrt_36, add_73, primals_277, relu_23, mul_110, primals_279, add_71, unsqueeze_295, add_72, mul_107, primals_280, unsqueeze_287, sub_35, primals_109, reciprocal_36, unsqueeze_286, primals_110, unsqueeze_284, primals_111, reciprocal_35, mul_106, mul_105, unsqueeze_283, unsqueeze_285, unsqueeze_282, unsqueeze_281, unsqueeze_280}
    )), InputBuffer(name='primals_112', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={add_70, mul_108, convolution_36, sqrt_35, primals_112, getitem_19, convolution_35, primals_108, unsqueeze_293, mul_109, unsqueeze_291, primals_106, unsqueeze_290, unsqueeze_292, unsqueeze_289, unsqueeze_294, primals_276, unsqueeze_288, sub_36, primals_107, sqrt_36, add_73, primals_277, relu_23, mul_110, convolution_37, primals_279, add_71, unsqueeze_295, add_72, mul_107, primals_280, unsqueeze_287, sub_35, primals_109, reciprocal_36, unsqueeze_286, primals_110, unsqueeze_284, primals_111, reciprocal_35, mul_106, mul_105, unsqueeze_283, unsqueeze_285, unsqueeze_282, unsqueeze_281, unsqueeze_280}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf107, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=[2, 116, 14, 14],
    origins={getitem_21}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf109,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf107, i3 + 14 * i2 + 196 * ModularIndexing(116 + i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=[2, 116, 14, 14],
      origins={getitem_21}
    )), InputBuffer(name='primals_115', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_21, convolution_38, primals_115}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf109, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_285, i1) * reciprocal(sqrt(load(primals_286, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_116, i1) + load(primals_117, i1)),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={primals_115, primals_286, getitem_21, sqrt_38, unsqueeze_304, add_77, mul_116, unsqueeze_311, primals_117, relu_25, unsqueeze_310, add_76, unsqueeze_307, primals_116, primals_285, unsqueeze_309, unsqueeze_308, mul_114, mul_115, sub_38, unsqueeze_306, unsqueeze_305, convolution_38, reciprocal_38}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf111,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf109, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_285, i1) * reciprocal(sqrt(load(primals_286, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_116, i1) + load(primals_117, i1)),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={primals_115, primals_286, getitem_21, sqrt_38, unsqueeze_304, add_77, mul_116, unsqueeze_311, primals_117, relu_25, unsqueeze_310, add_76, unsqueeze_307, primals_116, primals_285, unsqueeze_309, unsqueeze_308, mul_114, mul_115, sub_38, unsqueeze_306, unsqueeze_305, convolution_38, reciprocal_38}
    )), InputBuffer(name='primals_118', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
    kwargs={},
    output_view=None,
    origins={convolution_38, primals_115, primals_286, getitem_21, sqrt_38, unsqueeze_304, add_77, mul_116, unsqueeze_311, primals_117, relu_25, unsqueeze_310, add_76, unsqueeze_307, primals_116, primals_285, unsqueeze_309, primals_118, unsqueeze_308, mul_114, mul_115, sub_38, unsqueeze_306, unsqueeze_305, convolution_39, reciprocal_38}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf111, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_288, i1) * reciprocal(sqrt(load(primals_289, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_119, i1) + load(primals_120, i1),
    ranges=torch.Size([2, 116, 14, 14]),
    origins={unsqueeze_314, primals_115, primals_288, primals_286, unsqueeze_313, sqrt_38, unsqueeze_315, primals_289, mul_119, primals_117, add_76, mul_118, primals_116, primals_118, primals_119, add_79, convolution_39, reciprocal_38, unsqueeze_316, reciprocal_39, sqrt_39, unsqueeze_319, getitem_21, unsqueeze_317, unsqueeze_304, add_78, primals_120, add_77, mul_117, mul_116, unsqueeze_312, unsqueeze_311, unsqueeze_318, sub_39, relu_25, unsqueeze_310, unsqueeze_307, primals_285, unsqueeze_309, unsqueeze_308, mul_114, mul_115, sub_38, unsqueeze_306, unsqueeze_305, convolution_38}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf113,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=(22736, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf111, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_288, i1) * reciprocal(sqrt(load(primals_289, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_119, i1) + load(primals_120, i1),
      ranges=torch.Size([2, 116, 14, 14]),
      origins={unsqueeze_314, primals_115, primals_288, primals_286, unsqueeze_313, sqrt_38, unsqueeze_315, primals_289, mul_119, primals_117, add_76, mul_118, primals_116, primals_118, primals_119, add_79, convolution_39, reciprocal_38, unsqueeze_316, reciprocal_39, sqrt_39, unsqueeze_319, getitem_21, unsqueeze_317, unsqueeze_304, add_78, primals_120, add_77, mul_117, mul_116, unsqueeze_312, unsqueeze_311, unsqueeze_318, sub_39, relu_25, unsqueeze_310, unsqueeze_307, primals_285, unsqueeze_309, unsqueeze_308, mul_114, mul_115, sub_38, unsqueeze_306, unsqueeze_305, convolution_38}
    )), InputBuffer(name='primals_121', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_314, primals_115, primals_288, primals_286, unsqueeze_313, sqrt_38, unsqueeze_315, primals_289, mul_119, primals_117, add_76, mul_118, primals_116, primals_118, primals_119, add_79, convolution_39, reciprocal_38, primals_121, unsqueeze_316, reciprocal_39, sqrt_39, convolution_40, unsqueeze_319, getitem_21, unsqueeze_317, unsqueeze_304, add_78, primals_120, add_77, mul_117, mul_116, unsqueeze_312, unsqueeze_311, unsqueeze_318, sub_39, relu_25, unsqueeze_310, unsqueeze_307, primals_285, unsqueeze_309, unsqueeze_308, mul_114, mul_115, sub_38, unsqueeze_306, unsqueeze_305, convolution_38}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf116, i3 + 14 * i2 + 196 * ModularIndexing(i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
    ranges=(2, 232, 14, 14),
    origins={view_23}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf118,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf116, i3 + 14 * i2 + 196 * ModularIndexing(i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=(2, 232, 14, 14),
      origins={view_23}
    )), InputBuffer(name='primals_124', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 232),
    kwargs={},
    output_view=None,
    origins={primals_124, view_23, convolution_41}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf118, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_294, i1) * reciprocal(sqrt(load(primals_295, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_125, i1) + load(primals_126, i1),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={add_83, sqrt_41, add_82, primals_124, view_23, unsqueeze_334, reciprocal_41, unsqueeze_333, unsqueeze_330, primals_295, unsqueeze_335, primals_294, primals_126, unsqueeze_331, convolution_41, mul_125, primals_125, sub_41, unsqueeze_332, mul_124, mul_123, unsqueeze_328, unsqueeze_329}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf120,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf118, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_294, i1) * reciprocal(sqrt(load(primals_295, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_125, i1) + load(primals_126, i1),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={add_83, sqrt_41, add_82, primals_124, view_23, unsqueeze_334, reciprocal_41, unsqueeze_333, unsqueeze_330, primals_295, unsqueeze_335, primals_294, primals_126, unsqueeze_331, convolution_41, mul_125, primals_125, sub_41, unsqueeze_332, mul_124, mul_123, unsqueeze_328, unsqueeze_329}
    )), InputBuffer(name='primals_127', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={add_83, sqrt_41, add_82, primals_124, view_23, unsqueeze_334, reciprocal_41, unsqueeze_333, unsqueeze_330, primals_295, unsqueeze_335, primals_294, convolution_42, unsqueeze_331, primals_126, convolution_41, mul_125, primals_125, sub_41, unsqueeze_332, primals_127, mul_124, mul_123, unsqueeze_328, unsqueeze_329}
  )
)), TensorBox(StorageBox(
  Convolution(
    name=buf122,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=(45472, 196, 14, 1)),
    inputs=[ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf116, i3 + 14 * i2 + 196 * ModularIndexing(i1, 2, 116) + 22736 * ModularIndexing(i1, 1, 2) + 45472 * i0),
      ranges=(2, 232, 14, 14),
      origins={view_23}
    )), InputBuffer(name='primals_130', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_43, primals_130, view_23}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf122, i3 + 14 * i2 + 196 * i1 + 45472 * i0) - load(primals_300, i1) * reciprocal(sqrt(load(primals_301, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_131, i1) + load(primals_132, i1)),
    ranges=torch.Size([2, 232, 14, 14]),
    origins={reciprocal_43, primals_132, add_86, sqrt_43, primals_131, mul_131, unsqueeze_351, convolution_43, primals_130, view_23, unsqueeze_346, unsqueeze_344, sub_43, add_87, unsqueeze_347, mul_130, mul_129, relu_28, primals_301, unsqueeze_345, unsqueeze_349, primals_300, unsqueeze_350, unsqueeze_348}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf124,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf122, i3 + 14 * i2 + 196 * i1 + 45472 * i0) - load(primals_300, i1) * reciprocal(sqrt(load(primals_301, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_131, i1) + load(primals_132, i1)),
      ranges=torch.Size([2, 232, 14, 14]),
      origins={reciprocal_43, primals_132, add_86, sqrt_43, primals_131, mul_131, unsqueeze_351, convolution_43, primals_130, view_23, unsqueeze_346, unsqueeze_344, sub_43, add_87, unsqueeze_347, mul_130, mul_129, relu_28, primals_301, unsqueeze_345, unsqueeze_349, primals_300, unsqueeze_350, unsqueeze_348}
    )), InputBuffer(name='primals_133', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 232),
    kwargs={},
    output_view=None,
    origins={reciprocal_43, primals_132, add_86, sqrt_43, primals_131, mul_131, unsqueeze_351, convolution_43, primals_130, view_23, unsqueeze_346, unsqueeze_344, sub_43, add_87, unsqueeze_347, mul_130, mul_129, relu_28, primals_301, unsqueeze_345, convolution_44, unsqueeze_349, primals_300, unsqueeze_350, unsqueeze_348, primals_133}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf125', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf124, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_303, i1) * reciprocal(sqrt(load(primals_304, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_134, i1) + load(primals_135, i1),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={reciprocal_43, primals_132, add_86, sub_44, sqrt_43, primals_131, unsqueeze_358, unsqueeze_351, primals_130, unsqueeze_353, unsqueeze_344, sub_43, add_87, unsqueeze_347, unsqueeze_355, add_89, mul_130, mul_134, relu_28, mul_133, convolution_44, unsqueeze_349, unsqueeze_354, unsqueeze_348, reciprocal_44, unsqueeze_359, mul_131, convolution_43, unsqueeze_352, view_23, unsqueeze_346, add_88, unsqueeze_357, primals_304, unsqueeze_356, mul_129, primals_303, mul_132, primals_301, unsqueeze_345, primals_135, sqrt_44, primals_134, primals_300, unsqueeze_350, primals_133}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf126,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf125', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf124, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_303, i1) * reciprocal(sqrt(load(primals_304, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_134, i1) + load(primals_135, i1),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={reciprocal_43, primals_132, add_86, sub_44, sqrt_43, primals_131, unsqueeze_358, unsqueeze_351, primals_130, unsqueeze_353, unsqueeze_344, sub_43, add_87, unsqueeze_347, unsqueeze_355, add_89, mul_130, mul_134, relu_28, mul_133, convolution_44, unsqueeze_349, unsqueeze_354, unsqueeze_348, reciprocal_44, unsqueeze_359, mul_131, convolution_43, unsqueeze_352, view_23, unsqueeze_346, add_88, unsqueeze_357, primals_304, unsqueeze_356, mul_129, primals_303, mul_132, primals_301, unsqueeze_345, primals_135, sqrt_44, primals_134, primals_300, unsqueeze_350, primals_133}
    )), InputBuffer(name='primals_136', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_45, reciprocal_43, primals_132, add_86, sub_44, sqrt_43, primals_131, unsqueeze_358, unsqueeze_351, primals_130, unsqueeze_353, unsqueeze_344, sub_43, add_87, unsqueeze_347, primals_136, unsqueeze_355, add_89, mul_130, mul_134, relu_28, mul_133, convolution_44, unsqueeze_349, unsqueeze_354, unsqueeze_348, reciprocal_44, unsqueeze_359, mul_131, convolution_43, unsqueeze_352, view_23, unsqueeze_346, add_88, unsqueeze_357, primals_304, unsqueeze_356, mul_129, primals_303, mul_132, primals_301, unsqueeze_345, primals_135, sqrt_44, primals_134, primals_300, unsqueeze_350, primals_133}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf129', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf128, i3 + 7 * i2 + 49 * ModularIndexing(232 + i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
    ranges=[2, 232, 7, 7],
    origins={getitem_23}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf130,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf129', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf128, i3 + 7 * i2 + 49 * ModularIndexing(232 + i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
      ranges=[2, 232, 7, 7],
      origins={getitem_23}
    )), InputBuffer(name='primals_139', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_46, primals_139, getitem_23}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf131', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf130, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_309, i1) * reciprocal(sqrt(load(primals_310, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_140, i1) + load(primals_141, i1)),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={getitem_23, unsqueeze_373, primals_309, unsqueeze_369, convolution_46, unsqueeze_372, mul_138, sub_46, mul_140, reciprocal_46, unsqueeze_368, add_92, unsqueeze_370, unsqueeze_375, primals_141, relu_30, sqrt_46, add_93, primals_139, primals_140, unsqueeze_371, unsqueeze_374, primals_310, mul_139}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf132,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf131', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf130, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_309, i1) * reciprocal(sqrt(load(primals_310, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_140, i1) + load(primals_141, i1)),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={getitem_23, unsqueeze_373, primals_309, unsqueeze_369, convolution_46, unsqueeze_372, mul_138, sub_46, mul_140, reciprocal_46, unsqueeze_368, add_92, unsqueeze_370, unsqueeze_375, primals_141, relu_30, sqrt_46, add_93, primals_139, primals_140, unsqueeze_371, unsqueeze_374, primals_310, mul_139}
    )), InputBuffer(name='primals_142', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
    kwargs={},
    output_view=None,
    origins={getitem_23, unsqueeze_373, primals_309, unsqueeze_369, convolution_46, unsqueeze_372, mul_138, primals_142, sub_46, mul_140, reciprocal_46, unsqueeze_368, add_92, unsqueeze_370, unsqueeze_375, primals_141, relu_30, sqrt_46, add_93, primals_139, convolution_47, primals_140, unsqueeze_371, unsqueeze_374, primals_310, mul_139}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf133', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf132, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_312, i1) * reciprocal(sqrt(load(primals_313, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_143, i1) + load(primals_144, i1),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={mul_143, unsqueeze_369, convolution_46, unsqueeze_372, unsqueeze_383, unsqueeze_379, sub_46, mul_140, mul_141, unsqueeze_368, add_92, unsqueeze_370, add_94, relu_30, sqrt_46, sqrt_47, primals_313, primals_312, primals_139, convolution_47, primals_140, unsqueeze_371, unsqueeze_374, primals_310, unsqueeze_378, mul_139, reciprocal_47, unsqueeze_380, getitem_23, unsqueeze_373, primals_309, primals_144, primals_143, mul_138, primals_142, unsqueeze_377, sub_47, reciprocal_46, unsqueeze_375, unsqueeze_381, primals_141, add_95, unsqueeze_382, mul_142, add_93, unsqueeze_376}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf134,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf133', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf132, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_312, i1) * reciprocal(sqrt(load(primals_313, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_143, i1) + load(primals_144, i1),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={mul_143, unsqueeze_369, convolution_46, unsqueeze_372, unsqueeze_383, unsqueeze_379, sub_46, mul_140, mul_141, unsqueeze_368, add_92, unsqueeze_370, add_94, relu_30, sqrt_46, sqrt_47, primals_313, primals_312, primals_139, convolution_47, primals_140, unsqueeze_371, unsqueeze_374, primals_310, unsqueeze_378, mul_139, reciprocal_47, unsqueeze_380, getitem_23, unsqueeze_373, primals_309, primals_144, primals_143, mul_138, primals_142, unsqueeze_377, sub_47, reciprocal_46, unsqueeze_375, unsqueeze_381, primals_141, add_95, unsqueeze_382, mul_142, add_93, unsqueeze_376}
    )), InputBuffer(name='primals_145', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_48, mul_143, unsqueeze_369, convolution_46, unsqueeze_372, unsqueeze_383, unsqueeze_379, sub_46, mul_140, mul_141, unsqueeze_368, add_92, unsqueeze_370, add_94, relu_30, sqrt_46, sqrt_47, primals_313, primals_312, primals_139, convolution_47, primals_140, unsqueeze_371, unsqueeze_374, primals_310, unsqueeze_378, mul_139, reciprocal_47, unsqueeze_380, getitem_23, unsqueeze_373, primals_309, primals_144, primals_143, mul_138, primals_142, unsqueeze_377, sub_47, reciprocal_46, primals_145, unsqueeze_375, unsqueeze_381, primals_141, add_95, unsqueeze_382, mul_142, add_93, unsqueeze_376}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf138', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf137, i3 + 7 * i2 + 49 * ModularIndexing(232 + i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
    ranges=[2, 232, 7, 7],
    origins={getitem_25}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf139,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf138', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf137, i3 + 7 * i2 + 49 * ModularIndexing(232 + i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
      ranges=[2, 232, 7, 7],
      origins={getitem_25}
    )), InputBuffer(name='primals_148', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_25, convolution_49, primals_148}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf140', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf139, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_318, i1) * reciprocal(sqrt(load(primals_319, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_149, i1) + load(primals_150, i1)),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={mul_147, primals_318, mul_149, unsqueeze_398, unsqueeze_393, sub_49, primals_319, mul_148, reciprocal_49, relu_32, convolution_49, unsqueeze_396, primals_150, getitem_25, unsqueeze_392, add_99, unsqueeze_394, unsqueeze_399, unsqueeze_395, primals_148, sqrt_49, unsqueeze_397, add_98, primals_149}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf141,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf140', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf139, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_318, i1) * reciprocal(sqrt(load(primals_319, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_149, i1) + load(primals_150, i1)),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={mul_147, primals_318, mul_149, unsqueeze_398, unsqueeze_393, sub_49, primals_319, mul_148, reciprocal_49, relu_32, convolution_49, unsqueeze_396, primals_150, getitem_25, unsqueeze_392, add_99, unsqueeze_394, unsqueeze_399, unsqueeze_395, primals_148, sqrt_49, unsqueeze_397, add_98, primals_149}
    )), InputBuffer(name='primals_151', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
    kwargs={},
    output_view=None,
    origins={mul_147, primals_151, primals_318, mul_149, unsqueeze_398, unsqueeze_393, sub_49, primals_319, convolution_50, mul_148, reciprocal_49, relu_32, convolution_49, unsqueeze_396, primals_150, getitem_25, unsqueeze_392, add_99, unsqueeze_394, unsqueeze_399, unsqueeze_395, primals_148, sqrt_49, unsqueeze_397, add_98, primals_149}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf142', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf141, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_321, i1) * reciprocal(sqrt(load(primals_322, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_152, i1) + load(primals_153, i1),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={unsqueeze_404, primals_151, primals_152, mul_149, unsqueeze_398, unsqueeze_405, sub_49, mul_148, sub_50, add_100, reciprocal_49, relu_32, primals_153, unsqueeze_396, mul_152, unsqueeze_400, getitem_25, unsqueeze_392, unsqueeze_394, add_101, sqrt_49, reciprocal_50, mul_147, primals_318, unsqueeze_406, unsqueeze_393, primals_319, mul_150, unsqueeze_407, convolution_50, unsqueeze_403, convolution_49, unsqueeze_402, sqrt_50, primals_322, primals_150, add_99, unsqueeze_399, mul_151, unsqueeze_395, unsqueeze_401, primals_148, unsqueeze_397, primals_321, add_98, primals_149}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf143,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf142', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf141, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_321, i1) * reciprocal(sqrt(load(primals_322, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_152, i1) + load(primals_153, i1),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={unsqueeze_404, primals_151, primals_152, mul_149, unsqueeze_398, unsqueeze_405, sub_49, mul_148, sub_50, add_100, reciprocal_49, relu_32, primals_153, unsqueeze_396, mul_152, unsqueeze_400, getitem_25, unsqueeze_392, unsqueeze_394, add_101, sqrt_49, reciprocal_50, mul_147, primals_318, unsqueeze_406, unsqueeze_393, primals_319, mul_150, unsqueeze_407, convolution_50, unsqueeze_403, convolution_49, unsqueeze_402, sqrt_50, primals_322, primals_150, add_99, unsqueeze_399, mul_151, unsqueeze_395, unsqueeze_401, primals_148, unsqueeze_397, primals_321, add_98, primals_149}
    )), InputBuffer(name='primals_154', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={unsqueeze_404, primals_151, primals_152, mul_149, unsqueeze_398, unsqueeze_405, sub_49, primals_154, mul_148, add_100, sub_50, reciprocal_49, primals_153, relu_32, unsqueeze_396, mul_152, unsqueeze_400, getitem_25, unsqueeze_392, unsqueeze_394, add_101, sqrt_49, reciprocal_50, mul_147, primals_318, unsqueeze_406, unsqueeze_393, primals_319, mul_150, unsqueeze_407, convolution_50, unsqueeze_403, convolution_49, unsqueeze_402, sqrt_50, primals_322, primals_150, add_99, unsqueeze_399, mul_151, unsqueeze_395, unsqueeze_401, primals_148, unsqueeze_397, convolution_51, primals_321, add_98, primals_149}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf147', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf146, i3 + 7 * i2 + 49 * ModularIndexing(232 + i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
    ranges=[2, 232, 7, 7],
    origins={getitem_27}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf148,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf147', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf146, i3 + 7 * i2 + 49 * ModularIndexing(232 + i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
      ranges=[2, 232, 7, 7],
      origins={getitem_27}
    )), InputBuffer(name='primals_157', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_27, primals_157, convolution_52}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf149', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf148, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_327, i1) * reciprocal(sqrt(load(primals_328, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_158, i1) + load(primals_159, i1)),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={getitem_27, unsqueeze_417, sub_52, unsqueeze_420, primals_157, convolution_52, primals_328, unsqueeze_418, unsqueeze_416, mul_156, unsqueeze_423, add_104, add_105, sqrt_52, unsqueeze_419, relu_34, mul_157, unsqueeze_421, unsqueeze_422, primals_159, mul_158, reciprocal_52, primals_158, primals_327}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf150,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf149', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      relu(load(buf148, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_327, i1) * reciprocal(sqrt(load(primals_328, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_158, i1) + load(primals_159, i1)),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={getitem_27, unsqueeze_417, sub_52, unsqueeze_420, primals_157, convolution_52, primals_328, unsqueeze_418, unsqueeze_416, mul_156, unsqueeze_423, add_104, add_105, sqrt_52, unsqueeze_419, relu_34, mul_157, unsqueeze_421, unsqueeze_422, primals_159, mul_158, reciprocal_52, primals_158, primals_327}
    )), InputBuffer(name='primals_160', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
    constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
    kwargs={},
    output_view=None,
    origins={getitem_27, unsqueeze_417, sub_52, unsqueeze_420, primals_157, convolution_52, primals_328, unsqueeze_418, unsqueeze_416, mul_156, unsqueeze_423, add_104, add_105, sqrt_52, unsqueeze_419, convolution_53, primals_160, relu_34, mul_157, unsqueeze_421, unsqueeze_422, primals_159, mul_158, reciprocal_52, primals_158, primals_327}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf151', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf150, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_330, i1) * reciprocal(sqrt(load(primals_331, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_161, i1) + load(primals_162, i1),
    ranges=torch.Size([2, 232, 7, 7]),
    origins={getitem_27, unsqueeze_417, sub_53, convolution_52, primals_328, unsqueeze_418, mul_160, unsqueeze_429, add_104, add_105, unsqueeze_427, unsqueeze_419, primals_160, unsqueeze_421, unsqueeze_422, primals_159, primals_158, unsqueeze_425, unsqueeze_430, mul_159, primals_161, unsqueeze_426, sub_52, unsqueeze_420, primals_157, unsqueeze_424, primals_331, primals_330, unsqueeze_416, mul_156, primals_162, unsqueeze_428, unsqueeze_423, sqrt_52, unsqueeze_431, convolution_53, relu_34, mul_161, reciprocal_53, mul_157, mul_158, reciprocal_52, sqrt_53, add_107, add_106, primals_327}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf152,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=(11368, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf151', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf150, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_330, i1) * reciprocal(sqrt(load(primals_331, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_161, i1) + load(primals_162, i1),
      ranges=torch.Size([2, 232, 7, 7]),
      origins={getitem_27, unsqueeze_417, sub_53, convolution_52, primals_328, unsqueeze_418, mul_160, unsqueeze_429, add_104, add_105, unsqueeze_427, unsqueeze_419, primals_160, unsqueeze_421, unsqueeze_422, primals_159, primals_158, unsqueeze_425, unsqueeze_430, mul_159, primals_161, unsqueeze_426, sub_52, unsqueeze_420, primals_157, unsqueeze_424, primals_331, primals_330, unsqueeze_416, mul_156, primals_162, unsqueeze_428, unsqueeze_423, sqrt_52, unsqueeze_431, convolution_53, relu_34, mul_161, reciprocal_53, mul_157, mul_158, reciprocal_52, sqrt_53, add_107, add_106, primals_327}
    )), InputBuffer(name='primals_163', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={getitem_27, unsqueeze_417, sub_53, convolution_52, primals_328, unsqueeze_418, mul_160, unsqueeze_429, add_104, add_105, unsqueeze_427, convolution_54, unsqueeze_419, primals_160, unsqueeze_421, unsqueeze_422, primals_159, primals_158, unsqueeze_425, unsqueeze_430, mul_159, primals_161, unsqueeze_426, sub_52, unsqueeze_420, primals_157, unsqueeze_424, primals_331, primals_163, primals_330, unsqueeze_416, mul_156, primals_162, unsqueeze_428, unsqueeze_423, sqrt_52, unsqueeze_431, convolution_53, relu_34, mul_161, reciprocal_53, mul_157, mul_158, reciprocal_52, sqrt_53, add_107, add_106, primals_327}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf156', layout=FixedLayout('cuda', torch.float32, size=(2, 464, 7, 7), stride=[22736, 49, 7, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf155, i3 + 7 * i2 + 49 * ModularIndexing(i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
    ranges=(2, 464, 7, 7),
    origins={view_31}
  ))
)), TensorBox(StorageBox(
  Convolution(
    name=buf157,
    layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 1024, 7, 7]), stride=(50176, 49, 7, 1)),
    inputs=[ComputedBuffer(name='buf156', layout=FixedLayout('cuda', torch.float32, size=(2, 464, 7, 7), stride=[22736, 49, 7, 1]), data=Pointwise(
      'cuda',
      torch.float32,
      load(buf155, i3 + 7 * i2 + 49 * ModularIndexing(i1, 2, 232) + 11368 * ModularIndexing(i1, 1, 2) + 22736 * i0),
      ranges=(2, 464, 7, 7),
      origins={view_31}
    )), InputBuffer(name='primals_166', layout=FixedLayout('cuda', torch.float32, size=[1024, 464, 1, 1], stride=[464, 1, 1, 1]))],
    constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
    kwargs={},
    output_view=None,
    origins={convolution_55, view_31, primals_166}
  )
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf160', layout=FlexibleLayout('cuda', torch.float32, size=[2, 1024], stride=[1024, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    load(buf159, i1 + 1024 * i0) / index_expr(49, torch.float32),
    ranges=[2, 1024],
    origins={primals_166, relu_36, unsqueeze_442, convolution_55, unsqueeze_440, sqrt_55, unsqueeze_446, mul_165, view_31, mul_167, reciprocal_55, unsqueeze_447, add_110, mean, add_111, primals_336, unsqueeze_443, primals_168, unsqueeze_441, primals_167, unsqueeze_445, unsqueeze_444, primals_337, sub_55, mul_166}
  ))
)), TensorBox(
  ReinterpretView(
    StorageBox(
      InputBuffer(name='primals_169', layout=FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]))
    ),
    FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]),
    no origins?
  )
), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf158, i3 + 7 * i2 + 49 * i1 + 50176 * i0) <= constant(0, torch.float32),
    ranges=torch.Size([2, 1024, 7, 7]),
    origins={primals_166, relu_36, unsqueeze_442, convolution_55, unsqueeze_440, sqrt_55, unsqueeze_446, mul_165, view_31, mul_167, le, reciprocal_55, unsqueeze_447, add_110, add_111, primals_336, unsqueeze_443, primals_168, unsqueeze_441, primals_167, unsqueeze_445, unsqueeze_444, primals_337, sub_55, mul_166}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf153, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
    ranges=[2, 232, 7, 7],
    origins={unsqueeze_432, getitem_27, mul_163, convolution_52, primals_328, unsqueeze_418, mul_160, unsqueeze_429, relu_35, mul_164, add_105, unsqueeze_427, unsqueeze_419, unsqueeze_421, unsqueeze_422, unsqueeze_437, unsqueeze_425, mul_162, unsqueeze_420, primals_331, add_109, primals_330, primals_333, sub_54, primals_334, le_1, sqrt_52, unsqueeze_431, add_108, convolution_53, mul_158, reciprocal_52, sqrt_53, add_107, unsqueeze_417, sub_53, unsqueeze_439, unsqueeze_435, reciprocal_54, add_104, unsqueeze_433, unsqueeze_438, sqrt_54, convolution_54, unsqueeze_436, primals_160, primals_159, primals_158, unsqueeze_430, unsqueeze_434, mul_159, primals_161, unsqueeze_426, sub_52, primals_157, unsqueeze_424, primals_163, unsqueeze_416, mul_156, primals_162, unsqueeze_428, unsqueeze_423, primals_164, primals_165, reciprocal_53, relu_34, mul_161, mul_157, add_106, primals_327}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf144, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
    ranges=[2, 232, 7, 7],
    origins={unsqueeze_404, unsqueeze_408, unsqueeze_398, reciprocal_51, unsqueeze_409, add_100, sqrt_51, add_102, relu_32, unsqueeze_411, mul_152, unsqueeze_400, unsqueeze_392, unsqueeze_394, unsqueeze_415, sqrt_49, mul_147, unsqueeze_406, relu_33, mul_150, mul_155, unsqueeze_403, convolution_49, unsqueeze_413, primals_150, unsqueeze_399, le_3, primals_148, mul_153, primals_149, primals_151, primals_152, mul_149, unsqueeze_405, sub_49, primals_154, mul_148, sub_50, reciprocal_49, primals_153, primals_155, mul_154, unsqueeze_396, primals_156, getitem_25, unsqueeze_414, add_101, reciprocal_50, primals_318, unsqueeze_393, primals_319, unsqueeze_407, unsqueeze_410, convolution_50, primals_324, unsqueeze_402, sqrt_50, primals_322, add_99, mul_151, add_103, unsqueeze_395, unsqueeze_401, sub_51, unsqueeze_397, primals_325, unsqueeze_412, convolution_51, primals_321, add_98}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf135, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
    ranges=[2, 232, 7, 7],
    origins={add_97, unsqueeze_369, convolution_46, unsqueeze_383, sub_46, mul_140, relu_31, unsqueeze_368, add_92, primals_315, sqrt_46, primals_313, primals_312, primals_139, primals_140, unsqueeze_371, unsqueeze_378, reciprocal_47, primals_310, unsqueeze_373, primals_309, primals_144, primals_143, unsqueeze_386, mul_138, primals_142, unsqueeze_377, primals_145, sub_48, unsqueeze_375, primals_141, le_5, unsqueeze_382, unsqueeze_389, unsqueeze_376, unsqueeze_391, primals_147, unsqueeze_387, add_96, primals_146, reciprocal_48, convolution_48, mul_143, unsqueeze_372, unsqueeze_379, unsqueeze_385, mul_141, unsqueeze_370, add_94, unsqueeze_384, relu_30, sqrt_47, mul_145, primals_316, convolution_47, unsqueeze_380, unsqueeze_374, mul_139, unsqueeze_388, getitem_23, mul_144, sqrt_48, sub_47, reciprocal_46, unsqueeze_381, mul_146, add_95, mul_142, add_93, unsqueeze_390}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf127, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
    ranges=[2, 232, 7, 7],
    origins={convolution_45, reciprocal_43, primals_132, primals_131, unsqueeze_358, unsqueeze_351, le_7, primals_130, unsqueeze_353, sub_43, unsqueeze_363, primals_136, unsqueeze_355, mul_134, sub_45, primals_138, primals_137, convolution_44, relu_29, reciprocal_44, unsqueeze_359, mul_131, unsqueeze_352, primals_307, view_23, unsqueeze_362, primals_306, add_88, unsqueeze_357, primals_304, sqrt_45, primals_303, mul_137, primals_301, add_90, sqrt_44, primals_300, unsqueeze_361, add_86, sub_44, sqrt_43, reciprocal_45, mul_136, unsqueeze_344, add_87, unsqueeze_347, add_89, mul_130, unsqueeze_360, relu_28, mul_133, unsqueeze_349, unsqueeze_354, unsqueeze_348, mul_135, convolution_43, unsqueeze_365, unsqueeze_346, unsqueeze_367, unsqueeze_356, mul_129, unsqueeze_364, mul_132, add_91, unsqueeze_345, primals_135, primals_134, unsqueeze_350, unsqueeze_366, primals_133}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf121, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
    ranges=[2, 232, 7, 7],
    origins={add_83, primals_298, mul_126, sqrt_41, primals_297, sub_42, reciprocal_41, relu_27, primals_295, unsqueeze_342, primals_294, unsqueeze_331, reciprocal_42, primals_129, le_9, sub_41, mul_124, unsqueeze_337, add_85, unsqueeze_329, add_82, primals_124, view_23, unsqueeze_334, mul_123, unsqueeze_343, unsqueeze_333, unsqueeze_341, unsqueeze_330, unsqueeze_335, primals_126, convolution_41, convolution_42, unsqueeze_339, unsqueeze_340, unsqueeze_336, mul_125, primals_125, unsqueeze_332, primals_127, primals_128, unsqueeze_338, add_84, sqrt_42, mul_128, unsqueeze_328, mul_127}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf114, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={unsqueeze_314, primals_288, primals_286, sqrt_40, primals_289, mul_121, relu_26, reciprocal_40, unsqueeze_325, add_80, le_10, sub_40, unsqueeze_326, add_81, convolution_39, unsqueeze_316, unsqueeze_312, unsqueeze_320, mul_114, convolution_38, primals_115, unsqueeze_313, sqrt_38, unsqueeze_315, mul_119, primals_117, add_76, mul_118, primals_292, primals_116, unsqueeze_321, primals_118, primals_119, add_79, unsqueeze_323, primals_123, primals_291, primals_122, reciprocal_38, unsqueeze_322, primals_121, reciprocal_39, sqrt_39, convolution_40, unsqueeze_319, getitem_21, unsqueeze_317, add_78, primals_120, add_77, mul_117, mul_116, unsqueeze_311, unsqueeze_318, sub_39, relu_25, unsqueeze_310, unsqueeze_307, primals_285, unsqueeze_309, unsqueeze_324, unsqueeze_308, mul_115, mul_120, sub_38, unsqueeze_327, unsqueeze_306, mul_122, unsqueeze_305, unsqueeze_304}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf105, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={primals_282, mul_108, convolution_36, le_12, convolution_35, primals_108, primals_106, unsqueeze_292, unsqueeze_294, sub_36, primals_107, add_73, mul_110, sqrt_37, convolution_37, unsqueeze_295, add_72, unsqueeze_297, unsqueeze_296, primals_109, primals_110, primals_111, reciprocal_35, unsqueeze_298, primals_114, mul_113, sub_37, primals_113, unsqueeze_302, unsqueeze_301, unsqueeze_300, add_70, unsqueeze_303, mul_112, unsqueeze_299, add_75, sqrt_35, primals_112, getitem_19, unsqueeze_293, mul_109, unsqueeze_291, reciprocal_37, unsqueeze_290, unsqueeze_289, primals_276, unsqueeze_288, add_74, sqrt_36, relu_24, primals_277, relu_23, mul_111, primals_279, add_71, mul_107, primals_280, unsqueeze_287, sub_35, reciprocal_36, unsqueeze_286, unsqueeze_284, mul_106, mul_105, unsqueeze_283, primals_283, unsqueeze_285, unsqueeze_282, unsqueeze_281, unsqueeze_280}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf96, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={reciprocal_33, primals_97, unsqueeze_266, unsqueeze_264, primals_98, add_68, unsqueeze_265, primals_104, reciprocal_34, unsqueeze_267, mul_101, sub_33, unsqueeze_270, primals_105, unsqueeze_269, unsqueeze_268, unsqueeze_271, add_67, mul_100, primals_101, convolution_34, add_66, primals_100, sqrt_34, primals_99, primals_102, primals_103, unsqueeze_272, unsqueeze_276, reciprocal_32, unsqueeze_275, unsqueeze_274, unsqueeze_273, mul_103, unsqueeze_277, unsqueeze_278, add_64, primals_267, add_69, primals_268, mul_104, getitem_17, unsqueeze_279, primals_271, relu_22, primals_270, sub_34, sqrt_32, mul_99, primals_273, convolution_32, convolution_33, primals_274, mul_96, unsqueeze_263, unsqueeze_262, unsqueeze_261, unsqueeze_260, unsqueeze_259, unsqueeze_258, unsqueeze_257, unsqueeze_256, mul_97, le_14, sub_32, mul_98, add_65, relu_21, mul_102, sqrt_33}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf87, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={add_62, unsqueeze_241, reciprocal_31, mul_87, unsqueeze_243, mul_92, primals_96, primals_91, sub_30, unsqueeze_246, unsqueeze_245, unsqueeze_244, le_16, reciprocal_29, unsqueeze_247, add_61, add_60, mul_91, convolution_31, sqrt_31, primals_262, mul_95, sub_31, add_58, unsqueeze_254, sqrt_29, unsqueeze_253, primals_259, mul_94, unsqueeze_251, primals_258, primals_261, unsqueeze_250, unsqueeze_249, getitem_15, primals_264, unsqueeze_252, unsqueeze_255, add_63, primals_94, primals_95, unsqueeze_237, unsqueeze_248, unsqueeze_239, primals_265, add_59, relu_20, unsqueeze_234, unsqueeze_233, unsqueeze_232, unsqueeze_235, primals_92, unsqueeze_236, mul_88, mul_90, convolution_30, convolution_29, mul_89, sub_29, reciprocal_30, unsqueeze_238, relu_19, mul_93, sqrt_30, primals_88, primals_89, unsqueeze_242, primals_93, unsqueeze_240, primals_90}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf78, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={sqrt_28, primals_250, primals_249, convolution_28, mul_81, primals_252, add_56, reciprocal_26, unsqueeze_225, primals_256, primals_255, unsqueeze_224, unsqueeze_226, unsqueeze_223, unsqueeze_227, mul_85, reciprocal_28, add_57, primals_253, unsqueeze_231, sub_28, unsqueeze_230, unsqueeze_229, mul_86, add_52, sqrt_26, unsqueeze_228, relu_18, getitem_13, unsqueeze_219, unsqueeze_218, unsqueeze_217, mul_82, mul_78, unsqueeze_216, unsqueeze_221, unsqueeze_220, add_55, sqrt_27, reciprocal_27, primals_82, primals_81, unsqueeze_222, relu_17, primals_80, primals_79, mul_83, le_18, add_53, mul_80, unsqueeze_215, sub_26, primals_83, unsqueeze_214, unsqueeze_212, mul_79, unsqueeze_211, primals_87, sub_27, mul_84, unsqueeze_213, primals_86, unsqueeze_210, unsqueeze_209, primals_85, unsqueeze_208, primals_84, add_54, convolution_27, convolution_26}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf69, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={unsqueeze_199, mul_74, reciprocal_25, convolution_25, sqrt_25, add_49, reciprocal_23, add_48, unsqueeze_201, unsqueeze_200, unsqueeze_202, unsqueeze_203, getitem_11, mul_76, sqrt_23, add_51, unsqueeze_207, sub_25, unsqueeze_206, unsqueeze_205, mul_77, add_46, primals_247, unsqueeze_204, mul_69, relu_16, primals_74, convolution_23, primals_72, convolution_24, mul_72, primals_71, primals_70, unsqueeze_185, primals_73, primals_75, primals_76, unsqueeze_184, unsqueeze_186, mul_75, reciprocal_24, add_47, unsqueeze_191, sub_23, unsqueeze_190, primals_78, unsqueeze_189, unsqueeze_188, mul_70, mul_71, relu_15, primals_77, sqrt_24, unsqueeze_187, unsqueeze_192, primals_244, add_50, primals_243, unsqueeze_193, primals_241, unsqueeze_194, primals_240, sub_24, le_20, unsqueeze_197, unsqueeze_196, mul_73, primals_246, unsqueeze_195, unsqueeze_198}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf60, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={mul_60, add_43, primals_61, le_22, primals_62, mul_65, unsqueeze_177, unsqueeze_176, unsqueeze_178, unsqueeze_179, mul_67, sqrt_20, primals_63, primals_66, add_45, convolution_20, primals_65, mul_68, unsqueeze_183, sub_22, primals_67, unsqueeze_182, primals_68, unsqueeze_181, convolution_21, relu_14, primals_64, unsqueeze_161, unsqueeze_180, unsqueeze_160, unsqueeze_162, mul_63, unsqueeze_167, unsqueeze_166, unsqueeze_165, unsqueeze_164, mul_61, primals_69, sub_20, mul_62, add_41, unsqueeze_163, primals_232, relu_13, primals_231, mul_66, sqrt_21, add_40, primals_234, reciprocal_21, add_44, primals_235, primals_237, primals_238, unsqueeze_168, unsqueeze_169, unsqueeze_174, unsqueeze_172, add_42, mul_64, getitem_9, unsqueeze_171, reciprocal_22, unsqueeze_173, sub_21, unsqueeze_175, reciprocal_20, unsqueeze_170, convolution_22, sqrt_22}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf52, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={mul_56, unsqueeze_151, unsqueeze_150, unsqueeze_149, unsqueeze_148, mul_55, mul_51, sub_18, unsqueeze_147, reciprocal_19, unsqueeze_146, primals_59, unsqueeze_144, primals_58, add_38, primals_57, primals_56, unsqueeze_145, primals_55, primals_60, reciprocal_17, add_36, convolution_18, primals_223, sqrt_17, mul_54, unsqueeze_136, primals_225, unsqueeze_141, mul_52, primals_222, unsqueeze_139, unsqueeze_138, unsqueeze_140, unsqueeze_142, sub_17, unsqueeze_137, primals_226, primals_228, mul_57, primals_229, reciprocal_18, add_35, mul_53, relu_11, sqrt_18, unsqueeze_143, relu_12, view_7, add_34, add_39, le_24, mul_59, sub_19, unsqueeze_158, unsqueeze_159, unsqueeze_157, mul_58, unsqueeze_155, unsqueeze_154, primals_52, unsqueeze_156, convolution_17, unsqueeze_153, primals_53, unsqueeze_152, sqrt_19, convolution_19, add_37, primals_54}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf46, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
    ranges=[2, 116, 14, 14],
    origins={sqrt_15, unsqueeze_128, mul_49, mul_45, primals_217, unsqueeze_130, unsqueeze_129, primals_216, unsqueeze_131, unsqueeze_132, unsqueeze_133, add_33, view_7, mul_50, unsqueeze_135, sub_16, primals_219, reciprocal_16, mul_46, primals_220, unsqueeze_123, unsqueeze_121, le_26, unsqueeze_134, mul_48, convolution_15, convolution_16, add_32, sqrt_16, unsqueeze_120, primals_50, add_30, sub_15, unsqueeze_124, primals_49, relu_10, unsqueeze_122, primals_48, reciprocal_15, add_31, primals_47, mul_47, unsqueeze_127, primals_46, unsqueeze_126, primals_51, unsqueeze_125}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf39, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
    ranges=[2, 58, 28, 28],
    origins={primals_211, le_27, primals_210, sqrt_13, primals_213, relu_8, mul_42, unsqueeze_108, unsqueeze_107, reciprocal_14, unsqueeze_106, unsqueeze_105, mul_40, unsqueeze_109, unsqueeze_110, primals_214, unsqueeze_104, sqrt_14, add_27, add_26, mul_41, unsqueeze_111, reciprocal_12, convolution_14, sub_13, unsqueeze_116, unsqueeze_115, add_24, unsqueeze_114, unsqueeze_112, sqrt_12, unsqueeze_113, mul_36, mul_43, unsqueeze_97, unsqueeze_117, unsqueeze_118, unsqueeze_100, primals_38, add_29, getitem_7, primals_37, mul_44, unsqueeze_119, mul_39, sub_14, convolution_13, unsqueeze_96, primals_39, convolution_12, primals_44, primals_43, primals_42, primals_41, primals_40, relu_9, unsqueeze_99, mul_37, primals_45, unsqueeze_101, unsqueeze_102, unsqueeze_98, reciprocal_13, primals_208, mul_38, primals_207, unsqueeze_103, add_25, sub_12, add_28}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf30, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
    ranges=[2, 58, 28, 28],
    origins={reciprocal_11, add_19, add_21, mul_32, unsqueeze_87, sub_10, unsqueeze_86, unsqueeze_85, unsqueeze_84, mul_31, sqrt_11, add_20, unsqueeze_83, unsqueeze_91, unsqueeze_90, getitem_5, unsqueeze_89, sqrt_9, add_18, unsqueeze_88, primals_202, primals_201, le_29, mul_34, unsqueeze_92, convolution_11, primals_198, reciprocal_9, primals_199, mul_27, mul_35, primals_30, unsqueeze_95, sub_11, unsqueeze_94, add_23, primals_32, unsqueeze_73, primals_31, primals_33, unsqueeze_93, primals_34, unsqueeze_72, mul_30, convolution_10, primals_36, convolution_9, primals_35, unsqueeze_74, unsqueeze_77, mul_28, relu_7, unsqueeze_75, unsqueeze_76, sub_9, mul_29, unsqueeze_78, unsqueeze_79, unsqueeze_81, primals_204, primals_29, primals_28, add_22, sqrt_10, relu_6, mul_33, reciprocal_10, primals_205, unsqueeze_80, unsqueeze_82}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf21, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
    ranges=[2, 58, 28, 28],
    origins={unsqueeze_60, unsqueeze_62, sub_7, add_12, primals_20, unsqueeze_57, primals_19, sqrt_8, add_14, convolution_8, reciprocal_6, add_15, mul_23, unsqueeze_64, unsqueeze_63, add_13, mul_20, unsqueeze_55, add_17, sub_6, unsqueeze_54, unsqueeze_71, unsqueeze_53, sub_8, unsqueeze_52, unsqueeze_70, unsqueeze_51, unsqueeze_69, unsqueeze_68, mul_19, mul_25, unsqueeze_50, unsqueeze_67, unsqueeze_48, unsqueeze_66, unsqueeze_65, mul_26, le_31, unsqueeze_49, primals_196, relu_5, convolution_7, primals_195, mul_21, primals_193, convolution_6, primals_192, primals_190, primals_189, sqrt_6, getitem_3, reciprocal_7, mul_24, primals_27, sqrt_7, primals_26, add_16, primals_25, unsqueeze_56, primals_24, relu_4, unsqueeze_61, primals_23, mul_22, primals_22, unsqueeze_59, reciprocal_8, mul_18, unsqueeze_58, primals_21}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf13, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
    ranges=[2, 58, 28, 28],
    origins={add_9, mul_15, primals_172, sqrt_5, primals_171, mul_14, primals_17, mul, reciprocal_5, primals_16, add_6, primals_18, unsqueeze_40, unsqueeze_41, primals_15, unsqueeze_42, primals_13, convolution_5, primals_339, convolution_3, primals_12, primals_14, primals_11, primals_10, primals_1, mul_9, unsqueeze_35, convolution, primals_2, reciprocal_3, add, reciprocal, mul_12, le_33, sqrt_3, sqrt, add_8, convolution_4, unsqueeze_1, relu_3, unsqueeze_24, unsqueeze, unsqueeze_25, unsqueeze_29, primals_186, mul_10, unsqueeze_2, add_11, unsqueeze_3, unsqueeze_27, mul_17, mul_1, primals_187, sub_5, unsqueeze_34, unsqueeze_26, unsqueeze_46, max_pool2d_with_indices, primals_184, unsqueeze_45, mul_2, unsqueeze_28, unsqueeze_44, sub, unsqueeze_30, mul_16, unsqueeze_6, sub_3, unsqueeze_43, unsqueeze_5, primals_3, unsqueeze_47, unsqueeze_4, primals_181, unsqueeze_7, add_7, add_1, primals_180, mul_11, primals_183, relu_2, unsqueeze_39, relu, unsqueeze_31, add_10, unsqueeze_37, sqrt_4, reciprocal_4, unsqueeze_36, sub_4, mul_13, unsqueeze_33, unsqueeze_38, unsqueeze_32}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf7, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
    ranges=[2, 58, 28, 28],
    origins={unsqueeze_10, unsqueeze_9, mul_6, primals_172, unsqueeze_8, primals_171, unsqueeze_14, sub_1, primals_174, mul, primals_178, primals_177, add_3, mul_5, sqrt_2, convolution_2, primals_339, unsqueeze_15, unsqueeze_18, primals_175, unsqueeze_16, unsqueeze_17, primals_1, primals_6, unsqueeze_19, primals_9, convolution, primals_2, add, add_5, unsqueeze_23, sub_2, unsqueeze_22, unsqueeze_21, unsqueeze_20, le_35, mul_7, mul_8, reciprocal, relu_1, reciprocal_1, sqrt, unsqueeze_1, unsqueeze, unsqueeze_2, primals_4, unsqueeze_3, mul_1, max_pool2d_with_indices, primals_7, mul_2, sub, unsqueeze_6, primals_3, unsqueeze_5, primals_5, unsqueeze_4, unsqueeze_7, add_1, primals_8, sqrt_1, relu, mul_3, add_4, convolution_1, unsqueeze_13, unsqueeze_12, mul_4, add_2, unsqueeze_11, reciprocal_2}
  )
)), s0, 28, 28, 14, 14, 7, 7]

While executing return [addmm, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_160, primals_161, primals_163, primals_164, primals_166, primals_167, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, primals_322, primals_324, primals_325, primals_327, primals_328, primals_330, primals_331, primals_333, primals_334, primals_336, primals_337, primals_339, convolution, relu, getitem, getitem_1, convolution_1, add_3, convolution_2, convolution_3, relu_2, convolution_4, add_9, convolution_5, getitem_3, convolution_6, relu_4, convolution_7, add_15, convolution_8, getitem_5, convolution_9, relu_6, convolution_10, add_21, convolution_11, getitem_7, convolution_12, relu_8, convolution_13, add_27, convolution_14, view_7, convolution_15, add_31, convolution_16, convolution_17, relu_11, convolution_18, add_37, convolution_19, getitem_9, convolution_20, relu_13, convolution_21, add_43, convolution_22, getitem_11, convolution_23, relu_15, convolution_24, add_49, convolution_25, getitem_13, convolution_26, relu_17, convolution_27, add_55, convolution_28, getitem_15, convolution_29, relu_19, convolution_30, add_61, convolution_31, getitem_17, convolution_32, relu_21, convolution_33, add_67, convolution_34, getitem_19, convolution_35, relu_23, convolution_36, add_73, convolution_37, getitem_21, convolution_38, relu_25, convolution_39, add_79, convolution_40, view_23, convolution_41, add_83, convolution_42, convolution_43, relu_28, convolution_44, add_89, convolution_45, getitem_23, convolution_46, relu_30, convolution_47, add_95, convolution_48, getitem_25, convolution_49, relu_32, convolution_50, add_101, convolution_51, getitem_27, convolution_52, relu_34, convolution_53, add_107, convolution_54, view_31, convolution_55, mean, permute_17, le, le_1, le_3, le_5, le_7, le_9, le_10, le_12, le_14, le_16, le_18, le_20, le_22, le_24, le_26, le_27, le_29, le_31, le_33, le_35, sym_size, sym_size_1, sym_size_2, sym_size_3, sym_size_4, sym_size_5, sym_size_6]
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train shufflenet_v2_x1_0                 FAIL
/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/gym/core.py:317: DeprecationWarning: [33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.[0m
  deprecation(
/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: [33mWARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.[0m
  deprecation(
Running torchbench.py soft_actor_critic...
[2022-11-19 15:03:12,625] torch._dynamo.utils: [ERROR] RMSE (res-fp64): 2.78075, (ref-fp64): 0.00225 and shape=torch.Size([1024, 3])
[2022-11-19 15:03:12,625] torch._dynamo.utils: [ERROR] Accuracy failed for key name fc1.weight.grad
cuda train soft_actor_critic                  FAIL
Running torchbench.py speech_transformer...
ERROR:common:compile_fx raised AssertionError: While executing %self_layer_stack_0_slf_attn_attention_temperature : torch.Tensor [#users=1] = placeholder[target=self_layer_stack_0_slf_attn_attention_temperature]

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 466, in call_user_compiler
    compiled_fn = self.compiler_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 865, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 345, in compile_fx
    if not is_aot_autograd_safe_to_run(model_, example_inputs_, fake_mode):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/training.py", line 64, in is_aot_autograd_safe_to_run
    mutated = has_mutation(gm, example_inputs, fake_mode, inputs_only=True)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/analysis.py", line 138, in has_mutation
    ShapeAliasingAndMutationProp(new_gm).run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/analysis.py", line 116, in run
    super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/analysis.py", line 50, in run_node
    result = getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/analysis.py", line 41, in placeholder
    assert isinstance(value, torch.Tensor)
AssertionError: While executing %self_layer_stack_0_slf_attn_attention_temperature : torch.Tensor [#users=1] = placeholder[target=self_layer_stack_0_slf_attn_attention_temperature]

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/transformer.py", line 28, in forward
    encoder_padded_outputs, *_ = self.encoder(padded_input, input_lengths)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 48, in forward
    non_pad_mask = get_non_pad_mask(padded_input, input_lengths=input_lengths)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 50, in <graph break in forward>
    slf_attn_mask = get_attn_pad_mask(padded_input, input_lengths, length)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 55, in <graph break in forward>
    self.positional_encoding(padded_input))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: While executing %self_layer_stack_0_slf_attn_attention_temperature : torch.Tensor [#users=1] = placeholder[target=self_layer_stack_0_slf_attn_attention_temperature]

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train speech_transformer                 FAIL
Running torchbench.py squeezenet1_1...
[2022-11-19 15:03:39,197] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 941, in aot_dispatch_autograd
    compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args_with_views_handled)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 364, in fw_compiler
    return compile_fx_inner(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/debug_utils.py", line 473, in debug_wrapper
    compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/debug.py", line 177, in inner
    return fn(*args, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/contextlib.py", line 79, in inner
    return func(*args, **kwds)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
    graph.run(*example_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 140, in run
    return super().run(*args)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 321, in run_node
    result = super().run_node(n)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/graph.py", line 282, in output
    assert all(
AssertionError: [TensorBox(
  View(
    StorageBox(
      Pointwise(
        'cuda',
        torch.float32,
        load(buf65, i1 + 1000 * i0) / index_expr(169, torch.float32),
        ranges=[2, 1000, 1, 1],
        origins={primals_51, convolution_25, cat_7, primals_52, mean, relu_25}
      )
    ),
    size=(2, 1000),
    reindex=lambda i0, i1: [i0, i1, 0, 0],
    origins={primals_51, convolution_25, cat_7, primals_52, mean, view, relu_25}
  )
), TensorBox(StorageBox(
  InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 3, 3], stride=[27, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_3', layout=FixedLayout('cuda', torch.float32, size=[16, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 1, 1], stride=[16, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 3, 3], stride=[144, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_9', layout=FixedLayout('cuda', torch.float32, size=[16, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 1, 1], stride=[16, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 3, 3], stride=[144, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_15', layout=FixedLayout('cuda', torch.float32, size=[32, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 1, 1], stride=[32, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 3, 3], stride=[288, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_21', layout=FixedLayout('cuda', torch.float32, size=[32, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 1, 1], stride=[32, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 3, 3], stride=[288, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_27', layout=FixedLayout('cuda', torch.float32, size=[48, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 1, 1], stride=[48, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 3, 3], stride=[432, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_33', layout=FixedLayout('cuda', torch.float32, size=[48, 384, 1, 1], stride=[384, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 1, 1], stride=[48, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 3, 3], stride=[432, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_39', layout=FixedLayout('cuda', torch.float32, size=[64, 384, 1, 1], stride=[384, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_45', layout=FixedLayout('cuda', torch.float32, size=[64, 512, 1, 1], stride=[512, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_51', layout=FixedLayout('cuda', torch.float32, size=[1000, 512, 1, 1], stride=[512, 1, 1, 1]))
)), TensorBox(StorageBox(
  InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf1', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 111, 111]), stride=[788544, 12321, 111, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf0, i3 + 111 * i2 + 12321 * i1 + 788544 * i0) + load(primals_2, i1)),
    ranges=torch.Size([2, 64, 111, 111]),
    origins={primals_53, convolution, primals_2, relu, primals_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[193600, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    maximum(load(buf1, 224 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))))))),
    ranges=[2, 64, 55, 55],
    origins={primals_53, convolution, primals_2, max_pool2d_with_indices, relu, primals_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf3', layout=FlexibleLayout('cuda', torch.int64, size=[2, 64, 55, 55], stride=[193600, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.int64,
    where(load(buf1, 224 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))))))), index_expr(224 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))))), index_expr(223 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))))), index_expr(222 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))), index_expr(113 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))), index_expr(112 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))), index_expr(111 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)), index_expr(2 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), index_expr(1 + 2 * i3 + 222 * i2, torch.int64), index_expr(2 * i3 + 222 * i2, torch.int64))))))))),
    ranges=[2, 64, 55, 55],
    origins={primals_53, convolution, primals_2, max_pool2d_with_indices, relu, primals_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 16, 55, 55]), stride=[48400, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf4, i3 + 55 * i2 + 3025 * i1 + 48400 * i0) + load(primals_4, i1)),
    ranges=torch.Size([2, 16, 55, 55]),
    origins={primals_3, convolution, primals_4, relu, convolution_1, relu_1, primals_53, primals_2, max_pool2d_with_indices, primals_1}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf10', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 55, 55], stride=[387200, 3025, 55, 1]), inputs=[ComputedBuffer(name='buf8', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf6, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_6, i1)),
    ranges=torch.Size([2, 64, 55, 55]),
    origins={primals_3, relu_2, convolution, primals_4, relu, primals_6, convolution_1, relu_1, primals_53, convolution_2, primals_5, primals_2, max_pool2d_with_indices, primals_1}
  )), ComputedBuffer(name='buf9', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf7, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_8, i1)),
    ranges=torch.Size([2, 64, 55, 55]),
    origins={primals_7, primals_3, convolution, primals_4, relu, convolution_1, primals_8, relu_1, primals_53, relu_3, convolution_3, primals_2, max_pool2d_with_indices, primals_1}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 16, 55, 55]), stride=[48400, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf11, i3 + 55 * i2 + 3025 * i1 + 48400 * i0) + load(primals_10, i1)),
    ranges=torch.Size([2, 16, 55, 55]),
    origins={cat, primals_9, convolution_4, relu_4, primals_10}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf17', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 55, 55], stride=[387200, 3025, 55, 1]), inputs=[ComputedBuffer(name='buf15', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf13, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_12, i1)),
    ranges=torch.Size([2, 64, 55, 55]),
    origins={cat, primals_9, primals_12, convolution_4, primals_10, relu_5, primals_11, relu_4, convolution_5}
  )), ComputedBuffer(name='buf16', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf14, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_14, i1)),
    ranges=torch.Size([2, 64, 55, 55]),
    origins={cat, primals_9, convolution_4, primals_10, primals_14, primals_13, relu_6, convolution_6, relu_4}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[93312, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    maximum(load(buf17, 112 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))))))),
    ranges=[2, 128, 27, 27],
    origins={cat_1, max_pool2d_with_indices_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf19', layout=FlexibleLayout('cuda', torch.int64, size=[2, 128, 27, 27], stride=[93312, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.int64,
    where(load(buf17, 112 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))))))), index_expr(112 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))))), index_expr(111 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))))), index_expr(110 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))), index_expr(57 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))), index_expr(56 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))), index_expr(55 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)), index_expr(2 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), index_expr(1 + 2 * i3 + 110 * i2, torch.int64), index_expr(2 * i3 + 110 * i2, torch.int64))))))))),
    ranges=[2, 128, 27, 27],
    origins={cat_1, max_pool2d_with_indices_1}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf21', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 32, 27, 27]), stride=[23328, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf20, i3 + 27 * i2 + 729 * i1 + 23328 * i0) + load(primals_16, i1)),
    ranges=torch.Size([2, 32, 27, 27]),
    origins={max_pool2d_with_indices_1, relu_7, convolution_7, primals_16, cat_1, primals_15}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf26', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 27, 27], stride=[186624, 729, 27, 1]), inputs=[ComputedBuffer(name='buf24', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf22, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_18, i1)),
    ranges=torch.Size([2, 128, 27, 27]),
    origins={convolution_8, max_pool2d_with_indices_1, relu_7, convolution_7, cat_1, primals_17, primals_18, primals_16, primals_15, relu_8}
  )), ComputedBuffer(name='buf25', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf23, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_20, i1)),
    ranges=torch.Size([2, 128, 27, 27]),
    origins={convolution_9, primals_19, max_pool2d_with_indices_1, relu_7, convolution_7, cat_1, primals_20, primals_16, relu_9, primals_15}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 32, 27, 27]), stride=[23328, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf27, i3 + 27 * i2 + 729 * i1 + 23328 * i0) + load(primals_22, i1)),
    ranges=torch.Size([2, 32, 27, 27]),
    origins={cat_2, primals_22, primals_21, convolution_10, relu_10}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 27, 27], stride=[186624, 729, 27, 1]), inputs=[ComputedBuffer(name='buf31', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf29, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_24, i1)),
    ranges=torch.Size([2, 128, 27, 27]),
    origins={cat_2, primals_22, primals_21, convolution_11, primals_24, convolution_10, relu_10, primals_23, relu_11}
  )), ComputedBuffer(name='buf32', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf30, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_26, i1)),
    ranges=torch.Size([2, 128, 27, 27]),
    origins={cat_2, primals_22, primals_21, primals_26, primals_25, relu_12, convolution_10, relu_10, convolution_12}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[43264, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    maximum(load(buf33, 56 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))))))),
    ranges=[2, 256, 13, 13],
    origins={max_pool2d_with_indices_2, cat_3}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf35', layout=FlexibleLayout('cuda', torch.int64, size=[2, 256, 13, 13], stride=[43264, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.int64,
    where(load(buf33, 56 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))))))), index_expr(56 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))))), index_expr(55 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))))), index_expr(54 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))), index_expr(29 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))), index_expr(28 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))), index_expr(27 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)), index_expr(2 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), index_expr(1 + 2 * i3 + 54 * i2, torch.int64), index_expr(2 * i3 + 54 * i2, torch.int64))))))))),
    ranges=[2, 256, 13, 13],
    origins={max_pool2d_with_indices_2, cat_3}
  ))
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 48, 13, 13]), stride=[8112, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf36, i3 + 13 * i2 + 169 * i1 + 8112 * i0) + load(primals_28, i1)),
    ranges=torch.Size([2, 48, 13, 13]),
    origins={max_pool2d_with_indices_2, convolution_13, cat_3, primals_27, primals_28, relu_13}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 384, 13, 13], stride=[64896, 169, 13, 1]), inputs=[ComputedBuffer(name='buf40', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf38, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_30, i1)),
    ranges=torch.Size([2, 192, 13, 13]),
    origins={max_pool2d_with_indices_2, convolution_13, cat_3, primals_27, relu_14, primals_30, relu_13, primals_29, primals_28, convolution_14}
  )), ComputedBuffer(name='buf41', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf39, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_32, i1)),
    ranges=torch.Size([2, 192, 13, 13]),
    origins={max_pool2d_with_indices_2, convolution_13, cat_3, primals_27, relu_13, relu_15, primals_28, primals_31, primals_32, convolution_15}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 48, 13, 13]), stride=[8112, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf43, i3 + 13 * i2 + 169 * i1 + 8112 * i0) + load(primals_34, i1)),
    ranges=torch.Size([2, 48, 13, 13]),
    origins={convolution_16, cat_4, relu_16, primals_34, primals_33}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf49', layout=FixedLayout('cuda', torch.float32, size=[2, 384, 13, 13], stride=[64896, 169, 13, 1]), inputs=[ComputedBuffer(name='buf47', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf45, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_36, i1)),
    ranges=torch.Size([2, 192, 13, 13]),
    origins={convolution_16, primals_35, relu_17, convolution_17, primals_34, cat_4, relu_16, primals_36, primals_33}
  )), ComputedBuffer(name='buf48', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf46, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_38, i1)),
    ranges=torch.Size([2, 192, 13, 13]),
    origins={convolution_16, primals_37, primals_34, primals_38, cat_4, relu_18, relu_16, convolution_18, primals_33}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 13, 13]), stride=[10816, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf50, i3 + 13 * i2 + 169 * i1 + 10816 * i0) + load(primals_40, i1)),
    ranges=torch.Size([2, 64, 13, 13]),
    origins={primals_39, relu_19, cat_5, primals_40, convolution_19}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf56', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 13, 13], stride=[86528, 169, 13, 1]), inputs=[ComputedBuffer(name='buf54', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf52, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_42, i1)),
    ranges=torch.Size([2, 256, 13, 13]),
    origins={relu_19, convolution_20, cat_5, primals_40, primals_39, relu_20, primals_42, primals_41, convolution_19}
  )), ComputedBuffer(name='buf55', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf53, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_44, i1)),
    ranges=torch.Size([2, 256, 13, 13]),
    origins={relu_19, relu_21, cat_5, primals_40, primals_44, primals_43, primals_39, convolution_21, convolution_19}
  ))])
)), TensorBox(StorageBox(
  ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 13, 13]), stride=[10816, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf57, i3 + 13 * i2 + 169 * i1 + 10816 * i0) + load(primals_46, i1)),
    ranges=torch.Size([2, 64, 13, 13]),
    origins={cat_6, primals_45, convolution_22, relu_22, primals_46}
  ))
)), TensorBox(StorageBox(
  ConcatKernel(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 13, 13], stride=[86528, 169, 13, 1]), inputs=[ComputedBuffer(name='buf61', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf59, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_48, i1)),
    ranges=torch.Size([2, 256, 13, 13]),
    origins={cat_6, relu_23, primals_45, convolution_22, primals_48, primals_47, relu_22, primals_46, convolution_23}
  )), ComputedBuffer(name='buf62', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
    'cuda',
    torch.float32,
    relu(load(buf60, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_50, i1)),
    ranges=torch.Size([2, 256, 13, 13]),
    origins={cat_6, primals_50, primals_45, convolution_24, convolution_22, primals_49, relu_24, relu_22, primals_46}
  ))])
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    relu(load(buf64, i3 + 13 * i2 + 169 * i1 + 169000 * i0) + load(primals_52, i1)) <= constant(0, torch.float32),
    ranges=torch.Size([2, 1000, 13, 13]),
    origins={primals_51, convolution_25, cat_7, primals_52, le, relu_25}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf62, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
    ranges=[2, 256, 13, 13],
    origins={cat_6, primals_50, primals_45, convolution_24, convolution_22, primals_49, relu_24, le_1, relu_22, primals_46}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf61, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
    ranges=[2, 256, 13, 13],
    origins={le_2, cat_6, relu_23, primals_45, convolution_22, primals_48, primals_47, relu_22, primals_46, convolution_23}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf55, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
    ranges=[2, 256, 13, 13],
    origins={relu_19, relu_21, cat_5, primals_40, primals_44, primals_43, primals_39, convolution_21, convolution_19, le_4}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf54, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
    ranges=[2, 256, 13, 13],
    origins={relu_19, convolution_20, cat_5, primals_40, le_5, primals_39, relu_20, primals_42, primals_41, convolution_19}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf48, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
    ranges=[2, 192, 13, 13],
    origins={convolution_16, primals_37, le_7, primals_34, cat_4, primals_38, relu_18, relu_16, convolution_18, primals_33}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf47, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
    ranges=[2, 192, 13, 13],
    origins={convolution_16, primals_35, relu_17, convolution_17, le_8, primals_34, cat_4, relu_16, primals_36, primals_33}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf41, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
    ranges=[2, 192, 13, 13],
    origins={max_pool2d_with_indices_2, convolution_13, cat_3, primals_27, relu_13, relu_15, le_10, primals_28, primals_31, primals_32, convolution_15}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf40, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
    ranges=[2, 192, 13, 13],
    origins={max_pool2d_with_indices_2, convolution_13, cat_3, primals_27, relu_14, primals_30, le_11, relu_13, primals_29, primals_28, convolution_14}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf32, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
    ranges=[2, 128, 27, 27],
    origins={cat_2, primals_22, primals_21, primals_26, le_13, primals_25, relu_12, convolution_10, relu_10, convolution_12}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf31, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
    ranges=[2, 128, 27, 27],
    origins={cat_2, primals_22, primals_21, convolution_11, le_14, primals_24, convolution_10, relu_10, primals_23, relu_11}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf25, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
    ranges=[2, 128, 27, 27],
    origins={convolution_9, primals_19, max_pool2d_with_indices_1, relu_7, convolution_7, cat_1, primals_20, le_16, primals_16, relu_9, primals_15}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf24, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
    ranges=[2, 128, 27, 27],
    origins={convolution_8, max_pool2d_with_indices_1, relu_7, le_17, convolution_7, cat_1, primals_17, primals_18, primals_16, primals_15, relu_8}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf16, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
    ranges=[2, 64, 55, 55],
    origins={cat, primals_9, relu_4, convolution_4, primals_10, primals_14, primals_13, relu_6, convolution_6, le_19}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf15, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
    ranges=[2, 64, 55, 55],
    origins={cat, primals_9, primals_12, le_20, primals_10, convolution_4, relu_5, primals_11, relu_4, convolution_5}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf9, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
    ranges=[2, 64, 55, 55],
    origins={primals_7, primals_3, convolution, primals_4, le_22, relu, convolution_1, primals_8, relu_1, primals_53, relu_3, convolution_3, primals_2, max_pool2d_with_indices, primals_1}
  )
)), TensorBox(StorageBox(
  Pointwise(
    'cuda',
    torch.bool,
    load(buf8, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
    ranges=[2, 64, 55, 55],
    origins={max_pool2d_with_indices, primals_3, relu_2, convolution, primals_4, relu, primals_6, convolution_1, relu_1, primals_53, convolution_2, primals_5, primals_2, le_23, primals_1}
  )
)), s0, 13, 13]

While executing return [view, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, primals_49, primals_51, primals_53, relu, getitem, getitem_1, relu_1, cat, relu_4, cat_1, getitem_2, getitem_3, relu_7, cat_2, relu_10, cat_3, getitem_4, getitem_5, relu_13, cat_4, relu_16, cat_5, relu_19, cat_6, relu_22, cat_7, le, le_1, le_2, le_4, le_5, le_7, le_8, le_10, le_11, le_13, le_14, le_16, le_17, le_19, le_20, le_22, le_23, sym_size, sym_size_1, sym_size_2]
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1679, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train squeezenet1_1                      FAIL
Running torchbench.py tacotron2...
[2022-11-19 15:04:02,480] torch._dynamo.optimizations.backends: [ERROR] aot_autograd error
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 53, in inner
    return fn(model, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/optimizations/backends.py", line 563, in aot_autograd
    return aot_module_simplified(subgraph.model, subgraph.example_inputs, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1503, in aot_module_simplified
    aot_dispatcher_function = _create_aot_dispatcher_function(functional_call, joined_args, aot_config, fake_mode)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1191, in _create_aot_dispatcher_function
    aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 851, in aot_dispatch_autograd
    _fw_metadata, out = run_functionalized_fw_and_collect_metadata(
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 225, in inner
    outs = f(*f_args)
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 852, in <lambda>
    lambda *args: flat_fn(*(add_dupe_args(args))),
  File "/scratch/ezyang/work/b/pytorch/functorch/_src/aot_autograd.py", line 1479, in functional_call
    out = Interpreter(mod).run(*args[params_len:], **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/fx/interpreter.py", line 243, in call_function
    return target(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_inductor/overrides.py", line 951, in lowmem_dropout
    result = LowmemDropout.apply(input, p)
RuntimeError: Cannot call sizes() on tensor with symbolic sizes/strides

While executing %lowmem_dropout_2 : [#users=1] = call_function[target=torch._inductor.overrides.lowmem_dropout](args = (%relu : META IS MISSING, INVESTIGATE, 0.5, True), kwargs = {})
ERROR:common:compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 472, in call_user_compiler
    assert callable(compiled_fn), "compiler_fn did not return callable"
AssertionError: compiler_fn did not return callable

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/torchbenchmark/torchbenchmark/models/tacotron2/model.py", line 505, in forward
    encoder_outputs = self.encoder(embedded_inputs, text_lengths)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 314, in wrapper
    self.output.compile_subgraph(self, reason=reason)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 386, in compile_subgraph
    self.compile_and_call_fx_graph(tx, pass2.graph_output_vars(), root)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 432, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/output_graph.py", line 475, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised AssertionError: compiler_fn did not return callable

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

TorchDynamo optimized model failed to run because of following error
cuda train tacotron2                          FAIL
Running torchbench.py timm_efficientdet...
ERROR:common:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 211, in forward
    input_node = resample(input_node)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 134, in forward
    return F.interpolate(

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1087, in run_node
    return node.target(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/functional.py", line 3930, in interpolate
    return torch._C._nn.upsample_nearest2d(input, output_size, scale_factors)
RuntimeError: Cannot call sizes() on tensor with symbolic sizes/strides

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1053, in get_fake_value
    return wrap_fake_exception(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 737, in wrap_fake_exception
    return fn()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1054, in <lambda>
    lambda: run_node(tx.output, node, args, kwargs, nnmodule)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1096, in run_node
    raise RuntimeError(
RuntimeError: Failed running call_function <function interpolate at 0x7ff8de5d5000>(*(FakeTensor(FakeTensor(..., device='meta', size=(s0, s6, s8, s8), grad_fn=<CloneBackward0>), cuda:0), (10, 10), None, 'nearest', None), **{'recompute_scale_factor': False}):
Cannot call sizes() on tensor with symbolic sizes/strides
(scroll up for backtrace)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1157, in check_accuracy
    new_result = optimized_model_iter_fn(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 174, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/common.py", line 1055, in run_n_iterations
    self.model_iter_fn(mod, inputs, collect_outputs=False)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 333, in forward_and_backward_pass
    cloned_inputs = clone_inputs(inputs)
  File "/scratch/ezyang/work/b/pytorch/benchmarks/dynamo/torchbench.py", line 336, in <graph break in forward_and_backward_pass>
    pred = mod(*cloned_inputs)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/bench.py", line 133, in forward
    class_out, box_out = self.model(x)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 602, in forward
    x = self.backbone(x)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 603, in <graph break in forward>
    x = self.fpn(x)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 346, in forward
    x = self.cell(x)
  File "/scratch/ezyang/work/b/pytorch/torch/nn/modules/module.py", line 1427, in _call_impl
    return forward_call(*input, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/eval_frame.py", line 286, in catch_errors
    return callback(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 476, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 118, in _fn
    return fn(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 349, in _convert_frame_assert
    return _compile(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 404, in _compile
    out_code = transform_code_object(code, transform)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/convert_frame.py", line 392, in transform
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1617, in run
    super().run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 910, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/nn_module.py", line 222, in call_function
    return tx.inline_user_function_return(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 424, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1689, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1743, in inline_call_
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 910, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/nn_module.py", line 222, in call_function
    return tx.inline_user_function_return(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 424, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1689, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1743, in inline_call_
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 910, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/nn_module.py", line 222, in call_function
    return tx.inline_user_function_return(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 424, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1689, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1743, in inline_call_
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 910, in CALL_FUNCTION
    self.call_function(fn, args, {})
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/nn_module.py", line 183, in call_function
    tx.call_function(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/nn_module.py", line 222, in call_function
    return tx.inline_user_function_return(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 424, in inline_user_function_return
    result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1689, in inline_call
    return cls.inline_call_(parent, func, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 1743, in inline_call_
    tracer.run()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 483, in run
    and self.step()
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 453, in step
    getattr(self, inst.opname)(inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 287, in wrapper
    return inner_fn(self, inst)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 959, in CALL_FUNCTION_KW
    self.call_function(fn, args, kwargs)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/symbolic_convert.py", line 395, in call_function
    self.push(fn.call_function(self, args, kwargs))
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/torch.py", line 417, in call_function
    tensor_variable = wrap_fx_proxy(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 650, in wrap_fx_proxy
    return wrap_fx_proxy_cls(
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/variables/builder.py", line 691, in wrap_fx_proxy_cls
    example_value = get_fake_value(proxy.node, tx)
  File "/scratch/ezyang/work/b/pytorch/torch/_dynamo/utils.py", line 1066, in get_fake_value
    raise TorchRuntimeError() from e
torch._dynamo.exc.TorchRuntimeError:

from user code:
   File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 211, in forward
    input_node = resample(input_node)
  File "/data/home/ezyang/local/b/pytorch-env/lib/python3.10/site-packages/effdet/efficientdet.py", line 134, in forward
    return F.interpolate(

Set torch._dynamo.config.verbose=True for more information


You can suppress this exception and fall back to eager by setting:
    torch._dynamo.