Skip to content

Instantly share code, notes, and snippets.

@ezyang
Created November 6, 2022 06:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ezyang/4379d89528bd61a0fdb31062e402fb63 to your computer and use it in GitHub Desktop.
Save ezyang/4379d89528bd61a0fdb31062e402fb63 to your computer and use it in GitHub Desktop.
Sweep logs for symbolic-shapes --accuracy --backend inductor --training (TORCHDYNAMO_DYNAMIC_SHAPES=1) - 1f5fac1d10df2e4a054740abc92bcf9d6a6553eb Sun Nov 6 02:47:57 UTC 2022
This file has been truncated, but you can view the full file.
Running torchbench.py BERT_pytorch...
ERROR:common:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/bert.py", line 43, in forward
x = self.embedding(x, segment_info)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/bert.py", line 32, in forward
x = self.token(sequence) + self.position(sequence) + self.segment(segment_label)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/bert.py", line 32, in <graph break in forward>
x = self.token(sequence) + self.position(sequence) + self.segment(segment_label)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
compiled_fn = graph.compile_to_fn()
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 349, in compile_to_fn
return self.compile_to_module().call
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 335, in compile_to_module
code = self.codegen()
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 326, in codegen
self.wrapper_code = WrapperCodeGen()
File "/scratch/ezyang/work/pytorch/torch/_inductor/codegen/wrapper.py", line 240, in __init__
V.graph.sizevars.codegen(self.prefix, V.graph.graph_inputs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/sizevars.py", line 481, in codegen
assert not needed
AssertionError
TorchDynamo optimized model failed to run because of following error
cuda train BERT_pytorch FAIL
Running torchbench.py Background_Matting...
[2022-11-06 02:49:25,066] torch._inductor.graph: [WARNING] Creating implicit fallback for:
target: <built-in function sub>
args[0]: 128
args[1]: 1
ERROR:common:TypeError: sub expected 2 arguments, got 0
target: <built-in function sub>
args[0]: 128
args[1]: 1
While executing %sub_36 : [#users=1] = call_function[target=operator.sub](args = (%sym_size, 1), kwargs = {})
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
TypeError: sub expected 2 arguments, got 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/Background_Matting/networks.py", line 91, in forward
def forward(self, image,back,seg,multi):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: sub expected 2 arguments, got 0
target: <built-in function sub>
args[0]: 128
args[1]: 1
While executing %sub_36 : [#users=1] = call_function[target=operator.sub](args = (%sym_size, 1), kwargs = {})
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train Background_Matting FAIL
sampling loop time step: 0%| | 0/1 [00:00<?, ?it/s] sampling loop time step: 100%|██████████| 1/1 [00:00<00:00, 66.35it/s]
0it [00:00, ?it/s]
sampling loop time step: 0%| | 0/1 [00:00<?, ?it/s]
sampling loop time step: 100%|██████████| 1/1 [00:01<00:00, 1.21s/it] sampling loop time step: 100%|██████████| 1/1 [00:01<00:00, 1.21s/it]
1it [00:02, 2.03s/it]
sampling loop time step: 0%| | 0/1 [00:00<?, ?it/s]
sampling loop time step: 100%|██████████| 1/1 [00:00<00:00, 5.75it/s] sampling loop time step: 100%|██████████| 1/1 [00:00<00:00, 5.75it/s]
2it [00:03, 1.41s/it] 2it [00:03, 1.50s/it]
WARNING:root:DALLE2_pytorch failed to load
Eager model failed to run
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 978, in validate_model
self.model_iter_fn(model, example_inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 337, in forward_and_backward_pass
self.grad_scaler.scale(loss).backward()
File "/scratch/ezyang/work/pytorch/torch/_tensor.py", line 450, in backward
torch.autograd.backward(
File "/scratch/ezyang/work/pytorch/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1877, in run
device, name, model, example_inputs, batch_size = runner.load_model(
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 282, in load_model
self.validate_model(model, example_inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 980, in validate_model
raise NotImplementedError("Eager model failed to run")
NotImplementedError: Eager model failed to run
Running torchbench.py LearningToPaint...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/LearningToPaint/baseline/DRL/actor.py", line 104, in forward
def forward(self, x):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/77/c77macsht3xuqk2mqrn3rco3ortepqqfzdokmrlcme5oggulu75o.py", line 744, in call
return (buf44, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_66, primals_67, primals_69, primals_70, primals_72, primals_73, primals_75, primals_76, primals_78, primals_79, primals_81, primals_82, primals_84, primals_85, primals_87, primals_88, primals_90, primals_91, primals_93, primals_94, primals_96, primals_97, primals_99, primals_100, primals_102, primals_103, primals_105, primals_106, primals_108, primals_109, primals_111, primals_112, primals_114, primals_115, primals_117, primals_118, primals_120, primals_121, primals_123, primals_124, primals_126, primals_127, primals_129, buf0, buf1, buf2, buf3, buf4, buf5, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf37, buf38, buf39, buf40, buf41, buf42, buf44, as_strided(primals_64, (65, 512), (512, 1)), s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train LearningToPaint FAIL
Running torchbench.py Super_SloMo...
ERROR:common:Failed running call_function <function grid_sample at 0x7fa17a2ae040>(*(FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, s2, s2)), cuda:0), FakeTensor(FakeTensor(..., device='meta',
size=(s0, -127.0*s1 + 32.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s7 + (s2 - 2*s5 + 12)//2 + 9)//2 + 5)//2 + 5)//2 + 5)//2 + 413.0, -127.0*s1 + 32.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s7 + (s2 - 2*s5 + 12)//2 + 9)//2 + 5)//2 + 5)//2 + 5)//2 + 413.0, 2),
grad_fn=<StackBackward0>), cuda:0)), **{}):
TypeError: cannot determine truth value of Relational
At:
/scratch/ezyang/work/env/lib/python3.9/site-packages/sympy/core/relational.py(511): __bool__
/scratch/ezyang/work/pytorch/torch/fx/experimental/symbolic_shapes.py(203): bool_
/scratch/ezyang/work/pytorch/torch/nn/functional.py(4239): grid_sample
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(52): _run_node
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(131): <lambda>
/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py(709): wrap_fake_exception
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(130): _get_fake_value
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(199): create
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/torch.py(408): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(271): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(786): CALL_FUNCTION
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(181): wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(329): step
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(359): run
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1620): inline_call_
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1566): inline_call
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(300): inline_user_function_return
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/nn_module.py(221): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(271): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(786): CALL_FUNCTION
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(181): wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(329): step
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(359): run
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1494): run
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(384): transform
/scratch/ezyang/work/pytorch/torch/_dynamo/bytecode_transformation.py(341): transform_code_object
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(396): _compile
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(341): _convert_frame_assert
/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py(87): time_wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(112): _fn
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(459): _convert_frame
/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py(251): catch_errors
/scratch/ezyang/work/pytorch/torch/nn/modules/module.py(1423): _call_impl
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(335): <graph break in forward_and_backward_pass>
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(332): forward_and_backward_pass
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1020): run_n_iterations
/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py(173): _fn
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1122): check_accuracy
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1297): run_one_model
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1906): run
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(775): inner
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1580): main
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(349): <module>
(scroll up for backtrace)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 52, in _run_node
return node.target(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/nn/functional.py", line 4239, in grid_sample
return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum, align_corners)
RuntimeError: TypeError: cannot determine truth value of Relational
At:
/scratch/ezyang/work/env/lib/python3.9/site-packages/sympy/core/relational.py(511): __bool__
/scratch/ezyang/work/pytorch/torch/fx/experimental/symbolic_shapes.py(203): bool_
/scratch/ezyang/work/pytorch/torch/nn/functional.py(4239): grid_sample
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(52): _run_node
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(131): <lambda>
/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py(709): wrap_fake_exception
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(130): _get_fake_value
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(199): create
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/torch.py(408): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(271): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(786): CALL_FUNCTION
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(181): wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(329): step
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(359): run
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1620): inline_call_
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1566): inline_call
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(300): inline_user_function_return
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/nn_module.py(221): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(271): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(786): CALL_FUNCTION
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(181): wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(329): step
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(359): run
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1494): run
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(384): transform
/scratch/ezyang/work/pytorch/torch/_dynamo/bytecode_transformation.py(341): transform_code_object
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(396): _compile
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(341): _convert_frame_assert
/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py(87): time_wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(112): _fn
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(459): _convert_frame
/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py(251): catch_errors
/scratch/ezyang/work/pytorch/torch/nn/modules/module.py(1423): _call_impl
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(335): <graph break in forward_and_backward_pass>
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(332): forward_and_backward_pass
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1020): run_n_iterations
/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py(173): _fn
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1122): check_accuracy
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1297): run_one_model
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1906): run
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(775): inner
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1580): main
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(349): <module>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 251, in catch_errors
return callback(frame, cache_size)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 459, in _convert_frame
result = inner_convert(frame, cache_size)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 112, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 341, in _convert_frame_assert
return _compile(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 396, in _compile
out_code = transform_code_object(code, transform)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
transformations(instructions, code_options)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 384, in transform
tracer.run()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 1494, in run
super().run()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 359, in run
and self.step()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 329, in step
getattr(self, inst.opname)(inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 181, in wrapper
return inner_fn(self, inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 786, in CALL_FUNCTION
self.call_function(fn, args, {})
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 271, in call_function
self.push(fn.call_function(self, args, kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/nn_module.py", line 221, in call_function
return tx.inline_user_function_return(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 300, in inline_user_function_return
result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 1566, in inline_call
return cls.inline_call_(parent, func, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 1620, in inline_call_
tracer.run()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 359, in run
and self.step()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 329, in step
getattr(self, inst.opname)(inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 181, in wrapper
return inner_fn(self, inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 786, in CALL_FUNCTION
self.call_function(fn, args, {})
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 271, in call_function
self.push(fn.call_function(self, args, kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/torch.py", line 408, in call_function
tensor_variable = TensorVariable.create(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 199, in create
example_value = _get_fake_value(proxy.node, tx)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 130, in _get_fake_value
return wrap_fake_exception(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 709, in wrap_fake_exception
return fn()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 131, in <lambda>
lambda: _run_node(tx.output, node, args, kwargs, nnmodule)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 61, in _run_node
raise RuntimeError(
RuntimeError: Failed running call_function <function grid_sample at 0x7fa17a2ae040>(*(FakeTensor(FakeTensor(..., device='meta', size=(s0, s1, s2, s2)), cuda:0), FakeTensor(FakeTensor(..., device='meta',
size=(s0, -127.0*s1 + 32.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s7 + (s2 - 2*s5 + 12)//2 + 9)//2 + 5)//2 + 5)//2 + 5)//2 + 413.0, -127.0*s1 + 32.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s7 + (s2 - 2*s5 + 12)//2 + 9)//2 + 5)//2 + 5)//2 + 5)//2 + 413.0, 2),
grad_fn=<StackBackward0>), cuda:0)), **{}):
TypeError: cannot determine truth value of Relational
At:
/scratch/ezyang/work/env/lib/python3.9/site-packages/sympy/core/relational.py(511): __bool__
/scratch/ezyang/work/pytorch/torch/fx/experimental/symbolic_shapes.py(203): bool_
/scratch/ezyang/work/pytorch/torch/nn/functional.py(4239): grid_sample
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(52): _run_node
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(131): <lambda>
/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py(709): wrap_fake_exception
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(130): _get_fake_value
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py(199): create
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/torch.py(408): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(271): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(786): CALL_FUNCTION
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(181): wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(329): step
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(359): run
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1620): inline_call_
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1566): inline_call
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(300): inline_user_function_return
/scratch/ezyang/work/pytorch/torch/_dynamo/variables/nn_module.py(221): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(271): call_function
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(786): CALL_FUNCTION
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(181): wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(329): step
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(359): run
/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py(1494): run
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(384): transform
/scratch/ezyang/work/pytorch/torch/_dynamo/bytecode_transformation.py(341): transform_code_object
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(396): _compile
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(341): _convert_frame_assert
/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py(87): time_wrapper
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(112): _fn
/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py(459): _convert_frame
/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py(251): catch_errors
/scratch/ezyang/work/pytorch/torch/nn/modules/module.py(1423): _call_impl
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(335): <graph break in forward_and_backward_pass>
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(332): forward_and_backward_pass
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1020): run_n_iterations
/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py(173): _fn
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1122): check_accuracy
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1297): run_one_model
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1906): run
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(775): inner
/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py(1580): main
/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py(349): <module>
(scroll up for backtrace)
TorchDynamo optimized model failed to run because of following error
cuda train Super_SloMo FAIL
Running torchbench.py alexnet...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/alexnet.py", line 47, in forward
def forward(self, x: torch.Tensor) -> torch.Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/2x/c2x3p4um7day4xf4pwlhqsjbapihaurcsnxh5vlluu6k2i5ltfux.py", line 396, in call
return (buf21, primals_1, primals_3, primals_5, primals_7, primals_9, primals_17, buf1, buf2, buf3, buf5, buf6, buf7, buf9, buf11, buf13, buf14, buf15, as_strided(buf16, (2, 9216), (9216, 1)), buf18, buf20, as_strided(primals_15, (1000, 4096), (4096, 1)), as_strided(primals_13, (4096, 4096), (4096, 1)), as_strided(primals_11, (4096, 9216), (9216, 1)), s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train alexnet FAIL
Running torchbench.py attention_is_all_you_need_pytorch...
ERROR:common:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/attention_is_all_you_need_pytorch/transformer/Models.py", line 171, in forward
enc_output, *_ = self.encoder(src_seq, src_mask)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/attention_is_all_you_need_pytorch/transformer/Models.py", line 172, in <graph break in forward>
dec_output, *_ = self.decoder(trg_seq, trg_mask, enc_output, src_mask)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/attention_is_all_you_need_pytorch/transformer/Models.py", line 106, in forward
dec_output = self.dropout(self.position_enc(self.trg_word_emb(trg_seq)))
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/attention_is_all_you_need_pytorch/transformer/Models.py", line 106, in <graph break in forward>
dec_output = self.dropout(self.position_enc(self.trg_word_emb(trg_seq)))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
compiled_fn = graph.compile_to_fn()
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 349, in compile_to_fn
return self.compile_to_module().call
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 335, in compile_to_module
code = self.codegen()
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 326, in codegen
self.wrapper_code = WrapperCodeGen()
File "/scratch/ezyang/work/pytorch/torch/_inductor/codegen/wrapper.py", line 240, in __init__
V.graph.sizevars.codegen(self.prefix, V.graph.graph_inputs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/sizevars.py", line 481, in codegen
assert not needed
AssertionError
TorchDynamo optimized model failed to run because of following error
cuda train attention_is_all_you_need_pytorch FAIL
Running torchbench.py dcgan...
cuda train dcgan PASS
Running torchbench.py densenet121...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/densenet.py", line 213, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/xt/cxtlzawk5soh56qeaeeyglydxl7bo3iyqfn2zd4imra55rvwtof4.py", line 5748, in call
return (buf895, primals_1, primals_2, primals_4, primals_6, primals_7, primals_9, primals_10, primals_12, primals_13, primals_15, primals_16, primals_18, primals_19, primals_21, primals_22, primals_24, primals_25, primals_27, primals_28, primals_30, primals_31, primals_33, primals_34, primals_36, primals_37, primals_39, primals_40, primals_42, primals_43, primals_45, primals_46, primals_48, primals_49, primals_51, primals_52, primals_54, primals_55, primals_57, primals_58, primals_60, primals_61, primals_63, primals_64, primals_66, primals_67, primals_69, primals_70, primals_72, primals_73, primals_75, primals_76, primals_78, primals_79, primals_81, primals_82, primals_84, primals_85, primals_87, primals_88, primals_90, primals_91, primals_93, primals_94, primals_96, primals_97, primals_99, primals_100, primals_102, primals_103, primals_105, primals_106, primals_108, primals_109, primals_111, primals_112, primals_114, primals_115, primals_117, primals_118, primals_120, primals_121, primals_123, primals_124, primals_126, primals_127, primals_129, primals_130, primals_132, primals_133, primals_135, primals_136, primals_138, primals_139, primals_141, primals_142, primals_144, primals_145, primals_147, primals_148, primals_150, primals_151, primals_153, primals_154, primals_156, primals_157, primals_159, primals_160, primals_162, primals_163, primals_165, primals_166, primals_168, primals_169, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, primals_322, primals_324, primals_325, primals_327, primals_328, primals_330, primals_331, primals_333, primals_334, primals_336, primals_337, primals_339, primals_340, primals_342, primals_343, primals_345, primals_346, primals_348, primals_349, primals_351, primals_352, primals_354, primals_355, primals_357, primals_358, primals_360, primals_361, primals_365, primals_366, primals_368, primals_369, primals_371, primals_372, primals_374, primals_375, primals_377, primals_378, primals_380, primals_381, primals_383, primals_384, primals_386, primals_387, primals_389, primals_390, primals_392, primals_393, primals_395, primals_396, primals_398, primals_399, primals_401, primals_402, primals_404, primals_405, primals_407, primals_408, primals_410, primals_411, primals_413, primals_414, primals_416, primals_417, primals_419, primals_420, primals_422, primals_423, primals_425, primals_426, primals_428, primals_429, primals_431, primals_432, primals_434, primals_435, primals_437, primals_438, primals_440, primals_441, primals_443, primals_444, primals_446, primals_447, primals_449, primals_450, primals_452, primals_453, primals_455, primals_456, primals_458, primals_459, primals_461, primals_462, primals_464, primals_465, primals_467, primals_468, primals_470, primals_471, primals_473, primals_474, primals_476, primals_477, primals_479, primals_480, primals_482, primals_483, primals_485, primals_486, primals_488, primals_489, primals_491, primals_492, primals_494, primals_495, primals_497, primals_498, primals_500, primals_501, primals_503, primals_504, primals_506, primals_507, primals_509, primals_510, primals_512, primals_513, primals_515, primals_516, primals_518, primals_519, primals_521, primals_522, primals_524, primals_525, primals_527, primals_528, primals_530, primals_531, primals_533, primals_534, primals_536, primals_537, primals_539, primals_540, primals_542, primals_543, primals_545, primals_546, primals_548, primals_549, primals_551, primals_552, primals_554, primals_555, primals_557, primals_558, primals_560, primals_561, primals_563, primals_564, primals_566, primals_567, primals_569, primals_570, primals_572, primals_573, primals_575, primals_576, primals_578, primals_579, primals_581, primals_582, primals_584, primals_585, primals_587, primals_588, primals_590, primals_591, primals_593, primals_594, primals_596, primals_597, primals_599, primals_600, primals_602, primals_603, primals_605, primals_606, primals_608, primals_609, primals_611, primals_612, primals_614, primals_615, primals_617, primals_618, primals_620, primals_621, primals_623, primals_624, primals_626, primals_627, primals_629, primals_630, primals_632, primals_633, primals_635, primals_636, primals_638, primals_639, primals_641, primals_642, primals_644, primals_645, primals_647, primals_648, primals_650, primals_651, primals_653, primals_654, primals_656, primals_657, primals_659, primals_660, primals_662, primals_663, primals_665, primals_666, primals_668, primals_669, primals_671, primals_672, primals_674, primals_675, primals_677, primals_678, primals_680, primals_681, primals_683, primals_684, primals_686, primals_687, primals_689, primals_690, primals_692, primals_693, primals_695, primals_696, primals_698, primals_699, primals_701, primals_702, primals_704, primals_705, primals_707, primals_708, primals_710, primals_711, primals_713, primals_714, primals_716, primals_717, primals_719, primals_720, primals_722, primals_723, primals_725, primals_726, primals_728, buf0, buf1, buf3, buf2, buf4, buf5, buf6, buf9, buf10, buf11, buf12, buf17, buf18, buf19, buf20, buf26, buf27, buf28, buf29, buf36, buf37, buf38, buf39, buf47, buf48, buf49, buf50, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf68, buf69, buf70, buf71, buf76, buf77, buf78, buf79, buf85, buf86, buf87, buf88, buf95, buf96, buf97, buf98, buf106, buf107, buf108, buf109, buf118, buf119, buf120, buf121, buf131, buf132, buf133, buf134, buf145, buf146, buf147, buf148, buf160, buf161, buf162, buf163, buf176, buf177, buf178, buf179, buf193, buf194, buf195, buf196, buf211, buf212, buf213, buf214, buf215, buf216, buf217, buf220, buf221, buf222, buf223, buf228, buf229, buf230, buf231, buf237, buf238, buf239, buf240, buf247, buf248, buf249, buf250, buf258, buf259, buf260, buf261, buf270, buf271, buf272, buf273, buf283, buf284, buf285, buf286, buf297, buf298, buf299, buf300, buf312, buf313, buf314, buf315, buf328, buf329, buf330, buf331, buf345, buf346, buf347, buf348, buf363, buf364, buf365, buf366, buf382, buf383, buf384, buf385, buf402, buf403, buf404, buf405, buf423, buf424, buf425, buf426, buf445, buf446, buf447, buf448, buf468, buf469, buf470, buf471, buf492, buf493, buf494, buf495, buf517, buf518, buf519, buf520, buf543, buf544, buf545, buf546, buf570, buf571, buf572, buf573, buf598, buf599, buf600, buf601, buf627, buf628, buf629, buf630, buf657, buf658, buf659, buf660, buf661, buf662, buf663, buf666, buf667, buf668, buf669, buf674, buf675, buf676, buf677, buf683, buf684, buf685, buf686, buf693, buf694, buf695, buf696, buf704, buf705, buf706, buf707, buf716, buf717, buf718, buf719, buf729, buf730, buf731, buf732, buf743, buf744, buf745, buf746, buf758, buf759, buf760, buf761, buf774, buf775, buf776, buf777, buf791, buf792, buf793, buf794, buf809, buf810, buf811, buf812, buf828, buf829, buf830, buf831, buf848, buf849, buf850, buf851, buf869, buf870, buf871, buf872, buf891, as_strided(buf894, (2, 1024), (1024, 1)), as_strided(primals_363, (1000, 1024), (1024, 1)), buf896, s0, 7, 7, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train densenet121 FAIL
WARNING:root:detectron2_fcos_r_50_fpn failed to load
FCOS train is not supported by upstream detectron2. See GH Issue: https://github.com/facebookresearch/detectron2/issues/4369.
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1877, in run
device, name, model, example_inputs, batch_size = runner.load_model(
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 251, in load_model
benchmark = benchmark_cls(
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/model.py", line 18, in __call__
obj = type.__call__(cls, *args, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/detectron2_fcos_r_50_fpn/__init__.py", line 15, in __init__
super().__init__(variant="COCO-Detection/fcos_R_50_FPN_1x.py", test=test, device=device,
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/detectron2/model_factory.py", line 100, in __init__
loader = self.setup_train(cfg, args)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/detectron2/model_factory.py", line 110, in setup_train
raise NotImplementedError("FCOS train is not supported by upstream detectron2. " \
NotImplementedError: FCOS train is not supported by upstream detectron2. See GH Issue: https://github.com/facebookresearch/detectron2/issues/4369.
WARNING:root:detectron2_maskrcnn_r_50_c4 failed to load
Eager model failed to run
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 978, in validate_model
self.model_iter_fn(model, example_inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 336, in forward_and_backward_pass
loss = self.compute_loss(pred)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 326, in compute_loss
return reduce_to_scalar_loss(pred)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/testing.py", line 87, in reduce_to_scalar_loss
return sum([reduce_to_scalar_loss(x) for x in out]) / len(out)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/testing.py", line 87, in <listcomp>
return sum([reduce_to_scalar_loss(x) for x in out]) / len(out)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/testing.py", line 97, in reduce_to_scalar_loss
return sum([reduce_to_scalar_loss(value) for value in out.values()]) / len(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/testing.py", line 97, in <listcomp>
return sum([reduce_to_scalar_loss(value) for value in out.values()]) / len(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/testing.py", line 102, in reduce_to_scalar_loss
raise NotImplementedError("Don't know how to reduce", type(out))
NotImplementedError: ("Don't know how to reduce", <class 'detectron2.structures.instances.Instances'>)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1877, in run
device, name, model, example_inputs, batch_size = runner.load_model(
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 282, in load_model
self.validate_model(model, example_inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 980, in validate_model
raise NotImplementedError("Eager model failed to run")
NotImplementedError: Eager model failed to run
Running torchbench.py dlrm...
[2022-11-06 02:55:50,221] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,224] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,228] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,231] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,235] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,238] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,241] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
[2022-11-06 02:55:50,245] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._embedding_bag
ERROR:common:'SymInt' object cannot be interpreted as an integer
While executing %range_1 : [#users=1] = call_function[target=builtins.range](args = (%getitem_12,), kwargs = {})
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/dlrm/dlrm_s_pytorch.py", line 319, in interact_features
li = torch.tensor([i for i in range(ni) for j in range(i + offset)], device=x.device)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/dlrm/dlrm_s_pytorch.py", line 338, in forward
return self.sequential_forward(dense_x, lS_o, lS_i)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/dlrm/dlrm_s_pytorch.py", line 355, in sequential_forward
z = self.interact_features(x, ly)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/dlrm/dlrm_s_pytorch.py", line 302, in interact_features
def interact_features(self, x, ly):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 462, in aot_dispatch_autograd
out = flat_fn(*flat_args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 901, in functional_call
out = Interpreter(mod).run(*args[params_len:], **kwargs)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 243, in call_function
return target(*args, **kwargs)
TypeError: 'SymInt' object cannot be interpreted as an integer
While executing %range_1 : [#users=1] = call_function[target=builtins.range](args = (%getitem_12,), kwargs = {})
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/dlrm/dlrm_s_pytorch.py", line 319, in interact_features
li = torch.tensor([i for i in range(ni) for j in range(i + offset)], device=x.device)
TorchDynamo optimized model failed to run because of following error
cuda train dlrm FAIL
/scratch/ezyang/work/pytorch/torch/utils/tensorboard/__init__.py:4: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
if not hasattr(tensorboard, "__version__") or LooseVersion(
/scratch/ezyang/work/env/lib/python3.9/site-packages/gym/core.py:317: DeprecationWarning: WARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.
deprecation(
Running torchbench.py drq...
cuda train drq FAIL (TIMEOUT)
Running torchbench.py fastNLP_Bert...
[2022-11-06 03:05:59,955] torch._inductor.ir: [WARNING] DeviceCopy
ERROR:common:RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.default
args[0]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=1] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_embeddings': <class 'fastNLP.modules.encoder.bert.BertEmbeddings'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/modules/encoder/bert.py", line 230, in forward
position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/modules/encoder/bert.py", line 512, in forward
embedding_output = self.embeddings(input_ids, token_type_ids)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 1269, in arange
return fallback_arange(
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
File "/scratch/ezyang/work/pytorch/torch/_ops.py", line 445, in __call__
return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/models/bert.py", line 265, in forward
sequence_output = self.bert(words)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/embeddings/bert_embedding.py", line 137, in forward
outputs = self.model(words)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/embeddings/bert_embedding.py", line 445, in forward
max_word_piece_length = batch_word_pieces_length.sum(dim=-1).max().item() # 表示word piece的长度(包括padding)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/embeddings/bert_embedding.py", line 462, in <graph break in forward>
word_indexes = words.cpu().numpy()
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/embeddings/bert_embedding.py", line 482, in <graph break in forward>
bert_outputs, pooled_cls = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks,
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/modules/encoder/bert.py", line 480, in forward
def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.default
args[0]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=1] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_embeddings': <class 'fastNLP.modules.encoder.bert.BertEmbeddings'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/modules/encoder/bert.py", line 230, in forward
position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/fastNLP/modules/encoder/bert.py", line 512, in forward
embedding_output = self.embeddings(input_ids, token_type_ids)
TorchDynamo optimized model failed to run because of following error
cuda train fastNLP_Bert FAIL
Running torchbench.py functorch_dp_cifar10...
cuda train functorch_dp_cifar10 FAIL (TIMEOUT)
Running torchbench.py functorch_maml_omniglot...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/container.py", line 202, in forward
def forward(self, input):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/sh/cshw6h3my7hq3jqubhwesz632iajmgvsfeb2eapylwd5lij5nhlr.py", line 555, in call
return (buf21, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_15, buf1, as_strided(buf22, (64, ), (1, )), buf5, buf6, buf23, buf8, as_strided(buf24, (64, ), (1, )), buf12, buf13, buf25, buf15, as_strided(buf26, (64, ), (1, )), buf19, buf27, as_strided(buf20, (5, 64), (64, 1)), as_strided(primals_13, (5, 64), (64, 1)), as_strided(buf28, (1, 64, 1, 1), (0, 1, 0, 0)), as_strided(buf29, (1, 64, 1, 1), (0, 1, 0, 0)), as_strided(buf30, (1, 64, 1, 1), (0, 1, 0, 0)), s0, 1, 1, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train functorch_maml_omniglot FAIL
Running torchbench.py hf_Albert...
ERROR:common:'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_predictions': <class 'transformers.models.albert.modeling_albert.AlbertMLMHead'>, 'self_predictions_dense': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 883, in forward
hidden_states = self.dense(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 1003, in <graph break in forward>
prediction_scores = self.predictions(sequence_outputs)
Gradient addition node due to multiple use of tensor around:
Module stack: {'self_embedding_hidden_mapping_in': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 470, in forward
hidden_states = self.embedding_hidden_mapping_in(hidden_states)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 337, in <graph break in forward_and_backward_pass>
self.grad_scaler.scale(loss).backward()
File "/scratch/ezyang/work/pytorch/torch/_tensor.py", line 450, in backward
torch.autograd.backward(
File "/scratch/ezyang/work/pytorch/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/scratch/ezyang/work/pytorch/torch/autograd/function.py", line 270, in apply
return user_fn(self, *args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 558, in backward
CompiledFunction.compiled_bw = aot_config.bw_compiler(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/optimizations/backends.py", line 555, in _wrapped_bw_compiler
return disable(disable(bw_compiler)(*args, **kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 362, in bw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 199, in placeholder
sizes, strides = self.static_sizes_strides(example)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 53, in static_sizes_strides
size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_predictions': <class 'transformers.models.albert.modeling_albert.AlbertMLMHead'>, 'self_predictions_dense': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 883, in forward
hidden_states = self.dense(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 1003, in <graph break in forward>
prediction_scores = self.predictions(sequence_outputs)
Gradient addition node due to multiple use of tensor around:
Module stack: {'self_embedding_hidden_mapping_in': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/albert/modeling_albert.py", line 470, in forward
hidden_states = self.embedding_hidden_mapping_in(hidden_states)
TorchDynamo optimized model failed to run because of following error
cuda train hf_Albert FAIL
Running torchbench.py hf_Bart...
ERROR:common:RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.start
args[0]: 0
args[1]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=1] = call_function[target=torch.ops.aten.arange.start](args = (0, %add), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_embed_positions': <class 'transformers.models.bart.modeling_bart.BartLearnedPositionalEmbedding'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 134, in forward
positions = torch.arange(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 801, in forward
embed_pos = self.embed_positions(input_shape)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 1269, in arange
return fallback_arange(
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
File "/scratch/ezyang/work/pytorch/torch/_ops.py", line 445, in __call__
return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1353, in forward
outputs = self.model(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 1222, in forward
encoder_outputs = self.encoder(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 735, in forward
def forward(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.start
args[0]: 0
args[1]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=1] = call_function[target=torch.ops.aten.arange.start](args = (0, %add), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_embed_positions': <class 'transformers.models.bart.modeling_bart.BartLearnedPositionalEmbedding'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 134, in forward
positions = torch.arange(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bart/modeling_bart.py", line 801, in forward
embed_pos = self.embed_positions(input_shape)
TorchDynamo optimized model failed to run because of following error
cuda train hf_Bart FAIL
Running torchbench.py hf_Bert...
ERROR:common:'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_cls': <class 'transformers.models.bert.modeling_bert.BertOnlyMLMHead'>, 'self_cls_predictions': <class 'transformers.models.bert.modeling_bert.BertLMPredictionHead'>, 'self_cls_predictions_transform': <class 'transformers.models.bert.modeling_bert.BertPredictionHeadTransform'>, 'self_cls_predictions_transform_dense': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 675, in forward
hidden_states = self.dense(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 696, in forward
hidden_states = self.transform(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 707, in forward
prediction_scores = self.predictions(sequence_output)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1366, in <graph break in forward>
prediction_scores = self.cls(sequence_output)
Gradient addition node due to multiple use of tensor around:
Module stack: {'self_layer_0': <class 'transformers.models.bert.modeling_bert.BertLayer'>, 'self_layer_0_attention': <class 'transformers.models.bert.modeling_bert.BertAttention'>, 'self_layer_0_attention_self': <class 'transformers.models.bert.modeling_bert.BertSelfAttention'>, 'self_layer_0_attention_self_query': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 289, in forward
mixed_query_layer = self.query(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 423, in forward
self_outputs = self.self(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 493, in forward
self_attention_outputs = self.attention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward
layer_outputs = layer_module(
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 337, in <graph break in forward_and_backward_pass>
self.grad_scaler.scale(loss).backward()
File "/scratch/ezyang/work/pytorch/torch/_tensor.py", line 450, in backward
torch.autograd.backward(
File "/scratch/ezyang/work/pytorch/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/scratch/ezyang/work/pytorch/torch/autograd/function.py", line 270, in apply
return user_fn(self, *args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 558, in backward
CompiledFunction.compiled_bw = aot_config.bw_compiler(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/optimizations/backends.py", line 555, in _wrapped_bw_compiler
return disable(disable(bw_compiler)(*args, **kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 362, in bw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 199, in placeholder
sizes, strides = self.static_sizes_strides(example)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 53, in static_sizes_strides
size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_cls': <class 'transformers.models.bert.modeling_bert.BertOnlyMLMHead'>, 'self_cls_predictions': <class 'transformers.models.bert.modeling_bert.BertLMPredictionHead'>, 'self_cls_predictions_transform': <class 'transformers.models.bert.modeling_bert.BertPredictionHeadTransform'>, 'self_cls_predictions_transform_dense': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 675, in forward
hidden_states = self.dense(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 696, in forward
hidden_states = self.transform(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 707, in forward
prediction_scores = self.predictions(sequence_output)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 1366, in <graph break in forward>
prediction_scores = self.cls(sequence_output)
Gradient addition node due to multiple use of tensor around:
Module stack: {'self_layer_0': <class 'transformers.models.bert.modeling_bert.BertLayer'>, 'self_layer_0_attention': <class 'transformers.models.bert.modeling_bert.BertAttention'>, 'self_layer_0_attention_self': <class 'transformers.models.bert.modeling_bert.BertSelfAttention'>, 'self_layer_0_attention_self_query': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 289, in forward
mixed_query_layer = self.query(hidden_states)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 423, in forward
self_outputs = self.self(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 493, in forward
self_attention_outputs = self.attention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/bert/modeling_bert.py", line 607, in forward
layer_outputs = layer_module(
TorchDynamo optimized model failed to run because of following error
cuda train hf_Bert FAIL
Running torchbench.py hf_BigBird...
[2022-11-06 03:18:23,053] torch._inductor.graph: [WARNING] Creating implicit fallback for:
target: <built-in function mod>
args[0]: s1
args[1]: 64
ERROR:common:TypeError: mod expected 2 arguments, got 0
target: <built-in function mod>
args[0]: s1
args[1]: 64
While executing %mod : [#users=1] = call_function[target=operator.mod](args = (%sym_size, 64), kwargs = {})
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
TypeError: mod expected 2 arguments, got 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2462, in forward
outputs = self.bert(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2092, in forward
) = self._pad_to_block_size(
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/big_bird/modeling_big_bird.py", line 2224, in _pad_to_block_size
def _pad_to_block_size(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 663, in create_aot_dispatcher_function
return aot_dispatch_base(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 386, in aot_dispatch_base
compiled_fw = aot_config.fw_compiler(fw_module, flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: mod expected 2 arguments, got 0
target: <built-in function mod>
args[0]: s1
args[1]: 64
While executing %mod : [#users=1] = call_function[target=operator.mod](args = (%sym_size, 64), kwargs = {})
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train hf_BigBird FAIL
Running torchbench.py hf_DistilBert...
ERROR:common:'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_vocab_transform': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 659, in <graph break in forward>
prediction_logits = self.vocab_transform(hidden_states) # (bs, seq_length, dim)
Gradient addition node due to multiple use of tensor around:
Module stack: {'self_transformer': <class 'transformers.models.distilbert.modeling_distilbert.Transformer'>, 'self_transformer_layer_0': <class 'transformers.models.distilbert.modeling_distilbert.TransformerBlock'>, 'self_transformer_layer_0_attention': <class 'transformers.models.distilbert.modeling_distilbert.MultiHeadSelfAttention'>, 'self_transformer_layer_0_attention_q_lin': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 207, in forward
q = shape(self.q_lin(query)) # (bs, n_heads, q_length, dim_per_head)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 283, in forward
sa_output = self.attention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 345, in forward
layer_outputs = layer_module(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 567, in <graph break in forward>
return self.transformer(
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 337, in <graph break in forward_and_backward_pass>
self.grad_scaler.scale(loss).backward()
File "/scratch/ezyang/work/pytorch/torch/_tensor.py", line 450, in backward
torch.autograd.backward(
File "/scratch/ezyang/work/pytorch/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/scratch/ezyang/work/pytorch/torch/autograd/function.py", line 270, in apply
return user_fn(self, *args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 558, in backward
CompiledFunction.compiled_bw = aot_config.bw_compiler(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/optimizations/backends.py", line 555, in _wrapped_bw_compiler
return disable(disable(bw_compiler)(*args, **kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 362, in bw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 199, in placeholder
sizes, strides = self.static_sizes_strides(example)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 53, in static_sizes_strides
size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {'self_vocab_transform': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 659, in <graph break in forward>
prediction_logits = self.vocab_transform(hidden_states) # (bs, seq_length, dim)
Gradient addition node due to multiple use of tensor around:
Module stack: {'self_transformer': <class 'transformers.models.distilbert.modeling_distilbert.Transformer'>, 'self_transformer_layer_0': <class 'transformers.models.distilbert.modeling_distilbert.TransformerBlock'>, 'self_transformer_layer_0_attention': <class 'transformers.models.distilbert.modeling_distilbert.MultiHeadSelfAttention'>, 'self_transformer_layer_0_attention_q_lin': <class 'torch.nn.modules.linear.Linear'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 207, in forward
q = shape(self.q_lin(query)) # (bs, n_heads, q_length, dim_per_head)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 283, in forward
sa_output = self.attention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 345, in forward
layer_outputs = layer_module(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/distilbert/modeling_distilbert.py", line 567, in <graph break in forward>
return self.transformer(
TorchDynamo optimized model failed to run because of following error
cuda train hf_DistilBert FAIL
Running torchbench.py hf_GPT2...
ERROR:common:RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.start
args[0]: 0
args[1]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=1] = call_function[target=torch.ops.aten.arange.start](args = (0, %add), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 793, in forward
position_ids = torch.arange(past_length, input_shape[-1] + past_length, dtype=torch.long, device=device)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 1269, in arange
return fallback_arange(
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
File "/scratch/ezyang/work/pytorch/torch/_ops.py", line 445, in __call__
return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 1048, in forward
transformer_outputs = self.transformer(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 738, in forward
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.start
args[0]: 0
args[1]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=1] = call_function[target=torch.ops.aten.arange.start](args = (0, %add), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/gpt2/modeling_gpt2.py", line 793, in forward
position_ids = torch.arange(past_length, input_shape[-1] + past_length, dtype=torch.long, device=device)
TorchDynamo optimized model failed to run because of following error
cuda train hf_GPT2 FAIL
Running torchbench.py hf_GPT2_large...
cuda train hf_GPT2_large PASS
Running torchbench.py hf_Longformer...
[2022-11-06 03:19:55,668] torch._inductor.ir: [WARNING] Using FallbackKernel: aten.cumsum
[2022-11-06 03:19:55,684] torch._inductor.graph: [WARNING] Creating implicit fallback for:
target: <built-in function mod>
args[0]: s1
args[1]: 512
ERROR:common:TypeError: mod expected 2 arguments, got 0
target: <built-in function mod>
args[0]: s1
args[1]: 512
While executing %mod : [#users=1] = call_function[target=operator.mod](args = (%sym_size_1, 512), kwargs = {})
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
TypeError: mod expected 2 arguments, got 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1813, in forward
outputs = self.longformer(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/longformer/modeling_longformer.py", line 1616, in forward
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: TypeError: mod expected 2 arguments, got 0
target: <built-in function mod>
args[0]: s1
args[1]: 512
While executing %mod : [#users=1] = call_function[target=operator.mod](args = (%sym_size_1, 512), kwargs = {})
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train hf_Longformer FAIL
Running torchbench.py hf_Reformer...
ERROR:common:Cannot call sizes() on tensor with symbolic sizes/strides
While executing %lowmem_dropout : [#users=1] = call_function[target=torch._inductor.overrides.lowmem_dropout](args = (%self_word_embeddings,), kwargs = {p: 0.05, training: True})
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 2397, in forward
reformer_outputs = self.reformer(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 2063, in forward
least_common_mult_chunk_length = _get_least_common_mult_chunk_len(self.config)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 2100, in <graph break in forward>
embedding_output = self.embeddings(
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/reformer/modeling_reformer.py", line 239, in forward
def forward(self, input_ids=None, position_ids=None, inputs_embeds=None, start_idx_pos_encodings=0):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 462, in aot_dispatch_autograd
out = flat_fn(*flat_args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 901, in functional_call
out = Interpreter(mod).run(*args[params_len:], **kwargs)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 243, in call_function
return target(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/overrides.py", line 564, in lowmem_dropout
result = LowmemDropout.apply(input, p)
RuntimeError: Cannot call sizes() on tensor with symbolic sizes/strides
While executing %lowmem_dropout : [#users=1] = call_function[target=torch._inductor.overrides.lowmem_dropout](args = (%self_word_embeddings,), kwargs = {p: 0.05, training: True})
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train hf_Reformer FAIL
Running torchbench.py hf_T5...
WARNING:common:fp64 golden ref were not generated for hf_T5
ERROR:common:RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.default
args[0]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size_2,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': <class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>, 'self_model_encoder': <class 'transformers.models.t5.modeling_t5.T5Stack'>, 'self_model_encoder_block_0': <class 'transformers.models.t5.modeling_t5.T5Block'>, 'sub0_0': <class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>, 'self_model_encoder_block_0_layer_0_SelfAttention': <class 'transformers.models.t5.modeling_t5.T5Attention'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
attention_output = self.SelfAttention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
self_attention_outputs = self.layer[0](
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
layer_outputs = layer_module(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
encoder_outputs = self.encoder(
| File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 1269, in arange
return fallback_arange(
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
File "/scratch/ezyang/work/pytorch/torch/_ops.py", line 445, in __call__
return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 40, in forward
def forward(self, input_ids, decoder_input_ids):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.default
args[0]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size_2,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': <class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>, 'self_model_encoder': <class 'transformers.models.t5.modeling_t5.T5Stack'>, 'self_model_encoder_block_0': <class 'transformers.models.t5.modeling_t5.T5Block'>, 'sub0_0': <class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>, 'self_model_encoder_block_0_layer_0_SelfAttention': <class 'transformers.models.t5.modeling_t5.T5Attention'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
attention_output = self.SelfAttention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
self_attention_outputs = self.layer[0](
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
layer_outputs = layer_module(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
encoder_outputs = self.encoder(
| File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
TorchDynamo optimized model failed to run because of following error
cuda train hf_T5 FAIL
Running torchbench.py hf_T5_base...
WARNING:common:fp64 golden ref were not generated for hf_T5_base
ERROR:common:RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.default
args[0]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size_2,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': <class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>, 'self_model_encoder': <class 'transformers.models.t5.modeling_t5.T5Stack'>, 'self_model_encoder_block_0': <class 'transformers.models.t5.modeling_t5.T5Block'>, 'sub0_0': <class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>, 'self_model_encoder_block_0_layer_0_SelfAttention': <class 'transformers.models.t5.modeling_t5.T5Attention'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
attention_output = self.SelfAttention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
self_attention_outputs = self.layer[0](
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
layer_outputs = layer_module(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
encoder_outputs = self.encoder(
| File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 1269, in arange
return fallback_arange(
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 968, in handler
result = ir.FallbackKernel.create(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2918, in create
) = cls.process_kernel(kernel, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/ir.py", line 2297, in process_kernel
example_output = kernel(*new_args, **new_kwargs)
File "/scratch/ezyang/work/pytorch/torch/_ops.py", line 445, in __call__
return self._op(*args, **kwargs or {})
RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 40, in forward
def forward(self, input_ids, decoder_input_ids):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: RuntimeError: Overloaded torch operator invoked from Python failed to many any schema:
aten::arange() expected at most 5 argument(s) but received 7 argument(s). Declaration: aten::arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() expected at most 6 argument(s) but received 7 argument(s). Declaration: aten::arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
aten::arange() Expected a value of type 'number' for argument 'end' but instead found type 'Symbol'.
Position: 1
Value: s1
Declaration: aten::arange.start_step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
Cast error details: Cannot cast s1 to number
aten::arange() expected at most 4 argument(s) but received 7 argument(s). Declaration: aten::arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
aten::arange() expected at most 2 argument(s) but received 7 argument(s). Declaration: aten::arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
target: aten.arange.default
args[0]: s1
kwargs: {'dtype': torch.int64, 'device': device(type='cuda', index=0), 'pin_memory': False}
While executing %arange : [#users=2] = call_function[target=torch.ops.aten.arange.default](args = (%sym_size_2,), kwargs = {dtype: torch.int64, device: cuda:0, pin_memory: False})
Original traceback:
Module stack: {'self_model': <class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>, 'self_model_encoder': <class 'transformers.models.t5.modeling_t5.T5Stack'>, 'self_model_encoder_block_0': <class 'transformers.models.t5.modeling_t5.T5Block'>, 'sub0_0': <class 'transformers.models.t5.modeling_t5.T5LayerSelfAttention'>, 'self_model_encoder_block_0_layer_0_SelfAttention': <class 'transformers.models.t5.modeling_t5.T5Attention'>}
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 423, in compute_bias
context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 519, in forward
position_bias = self.compute_bias(real_seq_length, key_length, device=scores.device)
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 570, in forward
attention_output = self.SelfAttention(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 664, in forward
self_attention_outputs = self.layer[0](
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1033, in forward
layer_outputs = layer_module(
| File "/scratch/ezyang/work/env/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 1601, in forward
encoder_outputs = self.encoder(
| File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/util/framework/huggingface/model_factory.py", line 41, in forward
return self.model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
TorchDynamo optimized model failed to run because of following error
cuda train hf_T5_base FAIL
Running torchbench.py hf_T5_large...
cuda train hf_T5_large PASS
Running torchbench.py lennard_jones...
cuda train lennard_jones PASS
Running torchbench.py maml_omniglot...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/container.py", line 202, in forward
def forward(self, input):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/ns/cnsnwwqwtrtycz5yxyhoiofmimptlux6ptkutivlhorddcq2yppu.py", line 298, in call
return (buf12, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_15, primals_16, primals_18, primals_19, primals_21, primals_22, primals_24, buf1, buf2, buf3, buf13, buf5, buf6, buf7, buf14, buf9, buf10, buf15, as_strided(buf11, (5, 64), (64, 1)), as_strided(primals_13, (5, 64), (64, 1)), s0, 1, 1, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train maml_omniglot FAIL
Running torchbench.py mnasnet1_0...
ERROR:common:AssertionError:
target: aten.div.Scalar
args[0]: TensorBox(
ReinterpretView(
StorageBox(
MatrixMultiply(
name=buf0,
layout=FixedLayout('cuda', torch.float32, size=[s0, 1280], stride=[1280, 1]),
inputs=[InputBuffer(name='tangents_1', layout=FixedLayout('cuda', torch.float32, size=[s0, s1], stride=[s1, 1])), InputBuffer(name='permute_1', layout=FixedLayout('cuda', torch.float32, size=[1000, 1280], stride=[1280, 1]))],
constant_args=(),
kwargs={},
output_view=None,
origins={mm}
)
),
FixedLayout('cuda', torch.float32, size=[2, 1280, 7, 7], stride=[1280, 1, 0, 0]),
origins={expand}
)
)
args[1]: 49
While executing %div : [#users=1] = call_function[target=torch.ops.aten.div.Scalar](args = (%expand, %mul_156), kwargs = {})
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/torchvision/torchvision/models/mnasnet.py", line 161, in forward
x = x.mean([2, 3])
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 239, in call_function
out = lowerings[target](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 211, in wrapped
return decomp_fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 3259, in div
dtype = get_promoted_dtype(
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 138, in get_promoted_dtype
inps = [construct_input(arg) for arg in args]
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 138, in <listcomp>
inps = [construct_input(arg) for arg in args]
File "/scratch/ezyang/work/pytorch/torch/_inductor/lowering.py", line 133, in construct_input
assert hasattr(inp, "get_dtype")
AssertionError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 337, in <graph break in forward_and_backward_pass>
self.grad_scaler.scale(loss).backward()
File "/scratch/ezyang/work/pytorch/torch/_tensor.py", line 450, in backward
torch.autograd.backward(
File "/scratch/ezyang/work/pytorch/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/scratch/ezyang/work/pytorch/torch/autograd/function.py", line 270, in apply
return user_fn(self, *args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 558, in backward
CompiledFunction.compiled_bw = aot_config.bw_compiler(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/optimizations/backends.py", line 555, in _wrapped_bw_compiler
return disable(disable(bw_compiler)(*args, **kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 362, in bw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 242, in call_function
raise LoweringException(e, target, args, kwargs) from e
torch._inductor.exc.LoweringException: AssertionError:
target: aten.div.Scalar
args[0]: TensorBox(
ReinterpretView(
StorageBox(
MatrixMultiply(
name=buf0,
layout=FixedLayout('cuda', torch.float32, size=[s0, 1280], stride=[1280, 1]),
inputs=[InputBuffer(name='tangents_1', layout=FixedLayout('cuda', torch.float32, size=[s0, s1], stride=[s1, 1])), InputBuffer(name='permute_1', layout=FixedLayout('cuda', torch.float32, size=[1000, 1280], stride=[1280, 1]))],
constant_args=(),
kwargs={},
output_view=None,
origins={mm}
)
),
FixedLayout('cuda', torch.float32, size=[2, 1280, 7, 7], stride=[1280, 1, 0, 0]),
origins={expand}
)
)
args[1]: 49
While executing %div : [#users=1] = call_function[target=torch.ops.aten.div.Scalar](args = (%expand, %mul_156), kwargs = {})
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/torchvision/torchvision/models/mnasnet.py", line 161, in forward
x = x.mean([2, 3])
TorchDynamo optimized model failed to run because of following error
cuda train mnasnet1_0 FAIL
Running torchbench.py mobilenet_v2...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/mobilenetv2.py", line 173, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/5p/c5pmlevpgwzkkcfxelj7yp3dbehlfc2tqvewaclrxmmgmqmxn4xe.py", line 1292, in call
return (buf140, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_159, primals_160, primals_162, primals_163, primals_165, primals_166, primals_168, primals_169, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, buf0, buf2, buf3, buf5, buf6, buf7, buf8, buf10, buf11, buf13, buf14, buf15, buf16, buf18, buf19, buf21, buf22, buf23, buf24, buf26, buf27, buf29, buf30, buf31, buf32, buf34, buf35, buf37, buf38, buf39, buf40, buf42, buf43, buf45, buf46, buf47, buf48, buf50, buf51, buf53, buf54, buf55, buf56, buf58, buf59, buf61, buf62, buf63, buf64, buf66, buf67, buf69, buf70, buf71, buf72, buf74, buf75, buf77, buf78, buf79, buf80, buf82, buf83, buf85, buf86, buf87, buf88, buf90, buf91, buf93, buf94, buf95, buf96, buf98, buf99, buf101, buf102, buf103, buf104, buf106, buf107, buf109, buf110, buf111, buf112, buf114, buf115, buf117, buf118, buf119, buf120, buf122, buf123, buf125, buf126, buf127, buf128, buf130, buf131, buf133, buf134, buf135, buf136, as_strided(buf139, (2, 1280), (1280, 1)), as_strided(primals_157, (1000, 1280), (1280, 1)), buf141, buf142, buf143, buf144, buf145, buf146, buf147, buf148, buf149, buf150, buf151, buf152, buf153, buf154, buf155, buf156, buf157, buf158, buf159, buf160, buf161, buf162, buf163, buf164, buf165, buf166, buf167, buf168, buf169, buf170, buf171, buf172, buf173, buf174, buf175, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train mobilenet_v2 FAIL
Running torchbench.py mobilenet_v2_quantized_qat...
WARNING:common:fp64 golden ref were not generated for mobilenet_v2_quantized_qat
[2022-11-06 03:25:31,839] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,849] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,868] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,876] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,884] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,899] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,907] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,914] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,931] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,938] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,955] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,962] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,969] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,986] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:31,993] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,001] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,019] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,027] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,043] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,050] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,057] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,072] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,080] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,088] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,102] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,108] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,116] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,131] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,139] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,146] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,163] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,170] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,177] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,193] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,200] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,217] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,224] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,231] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,246] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,253] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,261] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,276] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,282] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,289] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,304] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,311] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,319] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,333] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,341] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,348] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,363] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,369] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,376] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,391] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,398] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,406] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,423] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,430] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,437] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,454] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,461] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,477] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,485] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,492] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,507] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,515] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,522] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,537] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,543] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,550] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,565] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,572] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,580] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,595] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,602] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,609] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,624] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,630] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,637] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,652] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,659] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,666] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,682] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,689] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,696] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,711] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,717] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,724] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,739] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,746] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,754] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,768] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,776] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,783] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,799] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,806] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,822] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,829] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,836] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,851] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,859] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,866] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,881] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,887] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,895] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,910] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,917] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,924] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,939] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,947] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,954] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,969] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,975] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,982] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:32,997] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,004] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,012] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,028] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,036] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,043] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,059] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,067] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,083] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,090] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,097] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,112] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,120] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,128] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,142] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,148] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,156] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,171] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,178] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,185] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,200] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,207] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,215] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,229] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,235] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,243] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,258] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,265] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,272] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,287] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,295] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,302] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,318] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,325] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,342] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,349] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,356] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,361] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,366] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,370] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:25:33,375] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/fx/graph_module.py", line 660, in call_wrapped
return self._wrapped_call(self, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/fx/graph_module.py", line 279, in __call__
raise e
File "/scratch/ezyang/work/pytorch/torch/fx/graph_module.py", line 269, in __call__
return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc]
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "<eval_with_key>.8", line 4, in forward
def forward(self, x : torch.Tensor) -> torch.Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/b3/cb3wg7pypttjp3ij5x2fyoiimpfnqnqikmfmpcbpkyhe7rcmdlmw.py", line 4565, in call
return (buf1335, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_166, primals_167, primals_183, primals_184, primals_200, primals_201, primals_217, primals_218, primals_234, primals_235, primals_251, primals_252, primals_268, primals_269, primals_285, primals_286, primals_302, primals_303, primals_326, primals_327, primals_343, primals_344, primals_360, primals_361, primals_377, primals_378, primals_394, primals_395, primals_411, primals_412, primals_435, primals_436, primals_452, primals_453, primals_469, primals_470, primals_493, primals_494, primals_510, primals_511, primals_527, primals_528, primals_544, primals_545, primals_561, primals_562, primals_578, primals_579, primals_602, primals_603, primals_619, primals_620, primals_636, primals_637, primals_660, primals_661, primals_677, primals_678, primals_694, primals_695, primals_718, primals_719, primals_735, primals_736, primals_752, primals_753, primals_769, primals_770, primals_786, primals_787, primals_803, primals_804, primals_827, primals_828, primals_844, primals_845, primals_861, primals_862, primals_885, primals_886, primals_902, primals_903, primals_919, primals_920, primals_936, primals_937, primals_953, primals_954, primals_970, primals_971, primals_994, primals_995, primals_1011, primals_1012, primals_1028, primals_1029, primals_1052, primals_1053, primals_1069, primals_1070, primals_1086, primals_1087, primals_1103, primals_1104, buf1, buf9, buf10, buf16, buf20, buf27, buf28, buf35, buf36, buf42, buf46, buf53, buf54, buf61, buf62, buf68, buf71, buf72, buf79, buf80, buf86, buf90, buf97, buf98, buf105, buf106, buf112, buf116, buf123, buf124, buf131, buf132, buf138, buf141, buf142, buf149, buf150, buf156, buf160, buf167, buf168, buf175, buf176, buf182, buf186, buf193, buf194, buf201, buf202, buf208, buf212, buf219, buf220, buf227, buf228, buf234, buf238, buf245, buf246, buf253, buf254, buf260, buf264, buf271, buf272, buf279, buf280, buf286, buf289, buf290, buf297, buf298, buf304, buf308, buf315, buf316, buf323, buf324, buf330, buf334, buf341, buf342, buf349, buf350, buf356, buf360, buf367, buf368, buf375, buf376, buf382, buf386, buf393, buf394, buf401, buf402, buf408, buf412, buf419, buf420, buf427, buf428, buf434, buf438, buf445, buf446, buf453, buf454, buf460, buf464, buf471, buf472, buf479, buf480, buf486, buf490, buf497, buf498, buf505, buf506, buf512, buf515, buf516, buf523, buf524, buf530, buf534, buf541, buf542, buf549, buf550, buf556, buf560, buf567, buf568, buf575, buf576, buf582, buf586, buf593, buf594, buf601, buf602, buf608, buf612, buf619, buf620, buf627, buf628, buf634, buf638, buf645, buf646, buf653, buf654, buf660, buf664, buf671, buf672, buf679, buf680, buf686, buf690, buf697, buf698, buf705, buf706, buf712, buf716, buf723, buf724, buf731, buf732, buf738, buf742, buf749, buf750, buf757, buf758, buf764, buf768, buf775, buf776, buf783, buf784, buf790, buf794, buf801, buf802, buf809, buf810, buf816, buf819, buf820, buf827, buf828, buf834, buf838, buf845, buf846, buf853, buf854, buf860, buf864, buf871, buf872, buf879, buf880, buf886, buf890, buf897, buf898, buf905, buf906, buf912, buf916, buf923, buf924, buf931, buf932, buf938, buf942, buf949, buf950, buf957, buf958, buf964, buf968, buf975, buf976, buf983, buf984, buf990, buf994, buf1001, buf1002, buf1009, buf1010, buf1016, buf1020, buf1027, buf1028, buf1035, buf1036, buf1042, buf1045, buf1046, buf1053, buf1054, buf1060, buf1064, buf1071, buf1072, buf1079, buf1080, buf1086, buf1090, buf1097, buf1098, buf1105, buf1106, buf1112, buf1116, buf1123, buf1124, buf1131, buf1132, buf1138, buf1142, buf1149, buf1150, buf1157, buf1158, buf1164, buf1168, buf1175, buf1176, buf1183, buf1184, buf1190, buf1194, buf1201, buf1202, buf1209, buf1210, buf1216, buf1220, buf1227, buf1228, buf1235, buf1236, buf1242, buf1246, buf1253, buf1254, buf1261, buf1262, buf1268, buf1271, buf1272, buf1279, buf1280, buf1286, buf1290, buf1298, buf1307, buf1314, buf1320, buf1321, buf1328, buf1336, as_strided(buf1327, (1000, 1280), (1280, 1)), buf1341, buf1342, buf1343, buf1344, buf1345, buf1346, buf1347, buf1348, buf1349, buf1350, buf1351, buf1352, buf1353, buf1354, buf1355, buf1356, buf1357, buf1358, buf1359, buf1360, buf1361, buf1362, buf1363, buf1364, buf1365, buf1366, buf1367, buf1368, buf1369, buf1370, buf1371, buf1372, buf1373, buf1374, buf1375, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train mobilenet_v2_quantized_qat FAIL
Running torchbench.py mobilenet_v3_large...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/mobilenetv3.py", line 219, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/6f/c6fidbx6g47kmntubx55f6jcmj7wta3vdpjsihaujcszsh6oa5sa.py", line 2306, in call
return (buf171, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_36, primals_38, primals_39, primals_41, primals_42, primals_44, primals_45, primals_47, primals_49, primals_51, primals_52, primals_54, primals_55, primals_57, primals_58, primals_60, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_111, primals_113, primals_114, primals_116, primals_117, primals_119, primals_120, primals_122, primals_124, primals_126, primals_127, primals_129, primals_130, primals_132, primals_133, primals_135, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_150, primals_152, primals_153, primals_155, primals_156, primals_158, primals_159, primals_161, primals_163, primals_165, primals_166, primals_168, primals_169, primals_175, primals_176, primals_178, primals_179, primals_181, primals_182, primals_184, primals_185, primals_187, primals_188, primals_190, primals_191, primals_193, primals_194, primals_196, primals_197, primals_199, primals_200, primals_202, primals_203, primals_205, primals_206, primals_208, primals_209, primals_211, primals_212, primals_214, primals_215, primals_217, primals_218, primals_220, primals_221, primals_223, primals_224, primals_226, primals_227, primals_229, primals_230, primals_232, primals_233, primals_235, primals_236, primals_238, primals_239, primals_241, primals_242, primals_244, primals_245, primals_247, primals_248, primals_250, primals_251, primals_253, primals_254, primals_256, primals_257, primals_259, primals_260, primals_262, primals_263, primals_265, primals_266, primals_268, primals_269, primals_271, primals_272, primals_274, primals_275, primals_277, primals_278, primals_280, primals_281, primals_283, primals_284, primals_286, primals_287, primals_289, primals_290, primals_292, primals_293, primals_295, primals_296, primals_298, primals_299, primals_301, primals_302, primals_304, primals_305, primals_307, primals_308, primals_310, primals_311, primals_313, buf0, buf172, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf24, buf26, buf28, buf30, buf31, buf32, buf33, buf34, buf35, buf37, buf39, buf41, buf43, buf44, buf45, buf46, buf47, buf48, buf50, buf52, buf54, buf56, buf57, buf58, buf173, buf60, buf61, buf174, buf63, buf64, buf65, buf66, buf175, buf68, buf69, buf176, buf71, buf72, buf73, buf74, buf177, buf76, buf77, buf178, buf79, buf80, buf81, buf82, buf179, buf84, buf85, buf180, buf87, buf88, buf89, buf90, buf181, buf92, buf93, buf182, buf95, buf97, buf99, buf101, buf103, buf104, buf105, buf183, buf107, buf108, buf184, buf110, buf112, buf114, buf116, buf118, buf119, buf120, buf185, buf122, buf123, buf186, buf125, buf127, buf129, buf131, buf133, buf134, buf135, buf187, buf137, buf138, buf188, buf140, buf142, buf144, buf146, buf148, buf149, buf150, buf189, buf152, buf153, buf190, buf155, buf157, buf159, buf161, buf163, buf164, buf165, buf191, as_strided(buf168, (2, 960), (960, 1)), buf169, buf170, as_strided(primals_173, (1000, 1280), (1280, 1)), as_strided(primals_171, (1280, 960), (960, 1)), buf192, buf193, buf194, buf195, buf196, buf197, buf198, buf199, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train mobilenet_v3_large FAIL
Running torchbench.py moco...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/nn/parallel/distributed.py", line 1093, in forward
output = self._run_ddp_forward(*inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/nn/parallel/distributed.py", line 1047, in _run_ddp_forward
return module_to_run(*inputs[0], **kwargs[0])
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/moco/moco/builder.py", line 115, in forward
def forward(self, im_q, im_k):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/e7/ce7lflz22usirdvhj27dv7435ibeuq6e57dw2aq6sdrgpdvevizi.py", line 1199, in call
return (buf113, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_162, primals_163, primals_165, primals_166, primals_168, primals_169, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf54, buf55, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, as_strided(buf109, (2, 2048), (2048, 1)), buf110, buf112, as_strided(primals_160, (128, 2048), (2048, 1)), buf114, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train moco FAIL
Running torchbench.py nvidia_deeprecommender...
cuda train nvidia_deeprecommender PASS
Running torchbench.py pytorch_CycleGAN_and_pix2pix...
ERROR:common:[TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf170, i3 + 256 * i2 + 65536 * i1) + load(primals_48, i1) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([1, 3, 256, 256]),
origins={sub_23}
)
)), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 7, 7], stride=[147, 49, 7, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_3', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_9', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_15', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_21', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_27', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_33', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_39', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_45', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[3, 64, 7, 7], stride=[3136, 49, 7, 1]))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf0', layout=FixedLayout('cuda', torch.float32, size=[1, 3, 262, 262], stride=[205932, 68644, 262, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_49, constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i3, torch.int32) - constant(3, torch.int32))) + constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i2, torch.int32) - constant(3, torch.int32))) * s1 + i1 * s1**2),
ranges=[1, 3, 262, 262],
origins={reflection_pad2d}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf2', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf1, i3 + 256 * i2 + 65536 * i1) + load(primals_2, i1),
ranges=torch.Size([1, 64, 256, 256]),
origins={convolution}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf6, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 64, 1, 1],
origins={reciprocal}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_3}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_4}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_5}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf2, i3 + 256 * i2 + 65536 * i1) - load(buf8, i1) / index_expr(65536, torch.float32) * reciprocal(sqrt(load(buf6, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 64, 256, 256]),
origins={relu}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf11', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf10, i3 + 128 * i2 + 16384 * i1) + load(primals_4, i1),
ranges=torch.Size([1, 128, 128, 128]),
origins={convolution_1}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf15, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 128, 1, 1],
origins={reciprocal_1}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_9}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_10}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_11}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf11, i3 + 128 * i2 + 16384 * i1) - load(buf17, i1) / index_expr(16384, torch.float32) * reciprocal(sqrt(load(buf15, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 128, 128, 128]),
origins={relu_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf20', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf19, i3 + 64 * i2 + 4096 * i1) + load(primals_6, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_2}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf22, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_2}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_15}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_16}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_17}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf24', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf20, i3 + 64 * i2 + 4096 * i1) - load(buf23, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf22, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf25', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf24, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf27', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf26, i3 + 64 * i2 + 4096 * i1) + load(primals_8, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_3}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf29, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_3}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_21}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_22}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_23}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf31', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf27, i3 + 64 * i2 + 4096 * i1) - load(buf30, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf29, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_3}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf31, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf34', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf33, i3 + 64 * i2 + 4096 * i1) + load(primals_10, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_4}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf36, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_4}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_27}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_28}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_29}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf38', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf24, i3 + 64 * i2 + 4096 * i1) + load(buf34, i3 + 64 * i2 + 4096 * i1) - load(buf37, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf36, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf39', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf38, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_3}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf41', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf40, i3 + 64 * i2 + 4096 * i1) + load(primals_12, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_5}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf43, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_5}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_33}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_34}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_35}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf45', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf41, i3 + 64 * i2 + 4096 * i1) - load(buf44, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf43, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_4}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf46', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf45, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_4}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf48', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf47, i3 + 64 * i2 + 4096 * i1) + load(primals_14, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_6}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf50, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_6}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_39}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_40}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_41}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf52', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf38, i3 + 64 * i2 + 4096 * i1) + load(buf48, i3 + 64 * i2 + 4096 * i1) - load(buf51, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf50, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf53', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf52, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_5}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf55', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf54, i3 + 64 * i2 + 4096 * i1) + load(primals_16, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_7}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf57, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_7}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_45}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_46}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_47}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf59', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf55, i3 + 64 * i2 + 4096 * i1) - load(buf58, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf57, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_5}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf60', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf59, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_6}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf62', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf61, i3 + 64 * i2 + 4096 * i1) + load(primals_18, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_8}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf64, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_8}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_51}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_52}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_53}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf66', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf52, i3 + 64 * i2 + 4096 * i1) + load(buf62, i3 + 64 * i2 + 4096 * i1) - load(buf65, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf64, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_20}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_7}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf69', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf68, i3 + 64 * i2 + 4096 * i1) + load(primals_20, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_9}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf71, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_9}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_57}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_58}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_59}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf73', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf69, i3 + 64 * i2 + 4096 * i1) - load(buf72, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf71, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_6}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf73, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_8}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf76', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf75, i3 + 64 * i2 + 4096 * i1) + load(primals_22, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_10}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf78, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_10}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_63}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_64}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_65}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf80', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, i3 + 64 * i2 + 4096 * i1) + load(buf76, i3 + 64 * i2 + 4096 * i1) - load(buf79, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf78, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_25}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_9}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf83', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf82, i3 + 64 * i2 + 4096 * i1) + load(primals_24, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_11}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf85, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_11}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_69}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_70}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_71}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf87', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf83, i3 + 64 * i2 + 4096 * i1) - load(buf86, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf85, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_7}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf88', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf87, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf90', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf89, i3 + 64 * i2 + 4096 * i1) + load(primals_26, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_12}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf92, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_12}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_75}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_76}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_77}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf94', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, i3 + 64 * i2 + 4096 * i1) + load(buf90, i3 + 64 * i2 + 4096 * i1) - load(buf93, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf92, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_30}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf95', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf94, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_11}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf97', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf96, i3 + 64 * i2 + 4096 * i1) + load(primals_28, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_13}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf99, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_13}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_81}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_82}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_83}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf101', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf97, i3 + 64 * i2 + 4096 * i1) - load(buf100, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf99, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_8}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf102', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf101, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_12}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf104', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf103, i3 + 64 * i2 + 4096 * i1) + load(primals_30, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_14}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf106, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_14}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_87}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_88}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_89}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf108', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf94, i3 + 64 * i2 + 4096 * i1) + load(buf104, i3 + 64 * i2 + 4096 * i1) - load(buf107, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf106, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_35}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf109', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf108, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_13}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf111', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf110, i3 + 64 * i2 + 4096 * i1) + load(primals_32, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_15}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf113, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_15}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_93}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_94}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_95}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf115', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf111, i3 + 64 * i2 + 4096 * i1) - load(buf114, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf113, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_9}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf116', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf115, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_14}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf118', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf117, i3 + 64 * i2 + 4096 * i1) + load(primals_34, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_16}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf120, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_16}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_99}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_100}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_101}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf122', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf108, i3 + 64 * i2 + 4096 * i1) + load(buf118, i3 + 64 * i2 + 4096 * i1) - load(buf121, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf120, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_40}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf122, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf125', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf124, i3 + 64 * i2 + 4096 * i1) + load(primals_36, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_17}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf127, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_17}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_105}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_106}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_107}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf129', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf125, i3 + 64 * i2 + 4096 * i1) - load(buf128, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf127, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf130', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf129, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_16}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf132', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf131, i3 + 64 * i2 + 4096 * i1) + load(primals_38, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_18}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf134, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_18}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_111}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_112}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_113}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf136', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf122, i3 + 64 * i2 + 4096 * i1) + load(buf132, i3 + 64 * i2 + 4096 * i1) - load(buf135, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf134, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_45}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf137', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf136, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_17}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf139', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf138, i3 + 64 * i2 + 4096 * i1) + load(primals_40, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_19}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf141, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_19}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_117}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_118}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_119}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf143', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf139, i3 + 64 * i2 + 4096 * i1) - load(buf142, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf141, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_11}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf144', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf143, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_18}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf146', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf145, i3 + 64 * i2 + 4096 * i1) + load(primals_42, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_20}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf148, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_20}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_123}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_124}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_125}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf150', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf136, i3 + 64 * i2 + 4096 * i1) + load(buf146, i3 + 64 * i2 + 4096 * i1) - load(buf149, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf148, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_50}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf152', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf151, i3 + 128 * i2 + 16384 * i1) + load(primals_44, i1),
ranges=torch.Size([1, 128, 128, 128]),
origins={convolution_21}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf156, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 128, 1, 1],
origins={reciprocal_21}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_129}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_130}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_131}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf159', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf152, i3 + 128 * i2 + 16384 * i1) - load(buf158, i1) / index_expr(16384, torch.float32) * reciprocal(sqrt(load(buf156, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 128, 128, 128]),
origins={relu_12}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf161', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf160, i3 + 256 * i2 + 65536 * i1) + load(primals_46, i1),
ranges=torch.Size([1, 64, 256, 256]),
origins={convolution_22}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf165, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 64, 1, 1],
origins={reciprocal_22}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_135}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_136}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_137}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf168', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf161, i3 + 256 * i2 + 65536 * i1) - load(buf167, i1) / index_expr(65536, torch.float32) * reciprocal(sqrt(load(buf165, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 64, 256, 256]),
origins={relu_13}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf169', layout=FixedLayout('cuda', torch.float32, size=[1, 64, 262, 262], stride=[4393216, 68644, 262, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf168, constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i3, torch.int32) - constant(3, torch.int32))) + 256 * constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i2, torch.int32) - constant(3, torch.int32))) + 65536 * i1),
ranges=[1, 64, 262, 262],
origins={reflection_pad2d_19}
))
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf170, i3 + 256 * i2 + 65536 * i1) + load(primals_48, i1) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([1, 3, 256, 256]),
origins={sub_23}
)
)), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf167, i1) / index_expr(65536, torch.float32),
ranges=[1, 64, 1, 1],
origins={mean_22}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_132}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_133}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_134}
),
size=(1, 64),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_138}
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_139}
),
size=(1, 64, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_140}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf158, i1) / index_expr(16384, torch.float32),
ranges=[1, 128, 1, 1],
origins={mean_21}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_126}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_127}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_128}
),
size=(1, 128),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_150}
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_151}
),
size=(1, 128, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_152}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf149, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_20}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_120}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_121}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_122}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_162}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_163}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_164}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf142, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_19}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_114}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_115}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_116}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_174}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_175}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_176}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf135, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_18}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_108}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_109}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_110}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_186}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_187}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_188}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf128, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_17}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_102}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_103}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_104}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_198}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_199}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_200}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf121, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_16}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_96}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_97}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_98}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_210}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_211}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_212}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf114, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_15}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_90}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_91}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_92}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_222}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_223}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_224}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf107, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_14}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_84}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_85}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_86}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_234}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_235}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_236}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf100, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_13}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_78}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_79}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_80}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_246}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_247}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_248}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf93, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_12}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_72}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_73}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_74}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_258}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_259}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_260}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf86, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_11}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_66}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_67}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_68}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_270}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_271}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_272}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf79, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_10}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_60}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_61}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_62}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_282}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_283}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_284}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf72, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_9}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_54}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_55}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_56}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_294}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_295}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_296}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf65, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_8}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_48}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_49}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_50}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_306}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_307}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_308}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf58, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_7}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_42}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_43}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_44}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_318}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_319}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_320}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf51, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_6}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_36}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_37}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_38}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_330}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_331}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_332}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf44, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_5}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_30}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_31}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_32}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_342}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_343}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_344}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf37, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_4}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_24}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_25}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_26}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_354}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_355}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_356}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf30, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_3}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_18}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_19}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_20}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_366}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_367}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_368}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf23, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_2}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_12}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_13}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_14}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_378}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_379}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_380}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf17, i1) / index_expr(16384, torch.float32),
ranges=[1, 128, 1, 1],
origins={mean_1}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_6}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_7}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_8}
),
size=(1, 128),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_390}
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_391}
),
size=(1, 128, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_392}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf8, i1) / index_expr(65536, torch.float32),
ranges=[1, 64, 1, 1],
origins={mean}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_1}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_2}
),
size=(1, 64),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_402}
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_403}
),
size=(1, 64, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_404}
)
), 1, 64, 256, 256, 128, 128, 128, 256, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 128, 256, 256]
While executing return [sub_23, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, reflection_pad2d, convolution, squeeze_5, view_3, convolution_1, squeeze_11, view_9, convolution_2, squeeze_17, view_15, reflection_pad2d_1, convolution_3, squeeze_23, view_21, reflection_pad2d_2, convolution_4, squeeze_29, add_10, reflection_pad2d_3, convolution_5, squeeze_35, view_29, reflection_pad2d_4, convolution_6, squeeze_41, add_15, reflection_pad2d_5, convolution_7, squeeze_47, view_37, reflection_pad2d_6, convolution_8, squeeze_53, add_20, reflection_pad2d_7, convolution_9, squeeze_59, view_45, reflection_pad2d_8, convolution_10, squeeze_65, add_25, reflection_pad2d_9, convolution_11, squeeze_71, view_53, reflection_pad2d_10, convolution_12, squeeze_77, add_30, reflection_pad2d_11, convolution_13, squeeze_83, view_61, reflection_pad2d_12, convolution_14, squeeze_89, add_35, reflection_pad2d_13, convolution_15, squeeze_95, view_69, reflection_pad2d_14, convolution_16, squeeze_101, add_40, reflection_pad2d_15, convolution_17, squeeze_107, view_77, reflection_pad2d_16, convolution_18, squeeze_113, add_45, reflection_pad2d_17, convolution_19, squeeze_119, view_85, reflection_pad2d_18, convolution_20, squeeze_125, add_50, convolution_21, squeeze_131, view_93, convolution_22, squeeze_137, view_99, reflection_pad2d_19, sub_23, unsqueeze_140, unsqueeze_152, unsqueeze_164, unsqueeze_176, unsqueeze_188, unsqueeze_200, unsqueeze_212, unsqueeze_224, unsqueeze_236, unsqueeze_248, unsqueeze_260, unsqueeze_272, unsqueeze_284, unsqueeze_296, unsqueeze_308, unsqueeze_320, unsqueeze_332, unsqueeze_344, unsqueeze_356, unsqueeze_368, unsqueeze_380, unsqueeze_392, unsqueeze_404, sym_size, mul, sym_size_1, sym_size_2, mul_3, sym_size_3, sym_size_4, mul_6, sym_size_5, sym_size_6, sym_size_7, sym_size_8, sym_size_9, sym_size_10, sym_size_11, sym_size_12, sym_size_13, sym_size_14, sym_size_15, sym_size_16, sym_size_17, sym_size_18, sym_size_19, sym_size_20, sym_size_21, sym_size_22, sym_size_23, sym_size_24, sym_size_25, sym_size_26, sym_size_27, sym_size_28, sym_size_29, sym_size_30, sym_size_31, sym_size_32, sym_size_33, sym_size_34, sym_size_35, sym_size_36, sym_size_37, sym_size_38, sym_size_39, sym_size_40, sym_size_41, sym_size_42, sym_size_43, sym_size_44, sym_size_45, sym_size_46]
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/models/networks.py", line 370, in forward
def forward(self, input):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 267, in output
assert all(
AssertionError: [TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf170, i3 + 256 * i2 + 65536 * i1) + load(primals_48, i1) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([1, 3, 256, 256]),
origins={sub_23}
)
)), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 7, 7], stride=[147, 49, 7, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_3', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_9', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_15', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_21', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_27', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_33', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_39', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 3, 3], stride=[1152, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_45', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[3, 64, 7, 7], stride=[3136, 49, 7, 1]))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf0', layout=FixedLayout('cuda', torch.float32, size=[1, 3, 262, 262], stride=[205932, 68644, 262, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_49, constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i3, torch.int32) - constant(3, torch.int32))) + constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i2, torch.int32) - constant(3, torch.int32))) * s1 + i1 * s1**2),
ranges=[1, 3, 262, 262],
origins={reflection_pad2d}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf2', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf1, i3 + 256 * i2 + 65536 * i1) + load(primals_2, i1),
ranges=torch.Size([1, 64, 256, 256]),
origins={convolution}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf6, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 64, 1, 1],
origins={reciprocal}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_3}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_4}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_5}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf2, i3 + 256 * i2 + 65536 * i1) - load(buf8, i1) / index_expr(65536, torch.float32) * reciprocal(sqrt(load(buf6, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 64, 256, 256]),
origins={relu}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf11', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf10, i3 + 128 * i2 + 16384 * i1) + load(primals_4, i1),
ranges=torch.Size([1, 128, 128, 128]),
origins={convolution_1}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf15, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 128, 1, 1],
origins={reciprocal_1}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_9}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_10}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_11}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf11, i3 + 128 * i2 + 16384 * i1) - load(buf17, i1) / index_expr(16384, torch.float32) * reciprocal(sqrt(load(buf15, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 128, 128, 128]),
origins={relu_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf20', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf19, i3 + 64 * i2 + 4096 * i1) + load(primals_6, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_2}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf22, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_2}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_15}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_16}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_17}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf24', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf20, i3 + 64 * i2 + 4096 * i1) - load(buf23, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf22, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf25', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf24, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf27', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf26, i3 + 64 * i2 + 4096 * i1) + load(primals_8, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_3}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf29, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_3}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_21}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_22}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_23}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf31', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf27, i3 + 64 * i2 + 4096 * i1) - load(buf30, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf29, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_3}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf31, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf34', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf33, i3 + 64 * i2 + 4096 * i1) + load(primals_10, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_4}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf36, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_4}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_27}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_28}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_29}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf38', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf24, i3 + 64 * i2 + 4096 * i1) + load(buf34, i3 + 64 * i2 + 4096 * i1) - load(buf37, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf36, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf39', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf38, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_3}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf41', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf40, i3 + 64 * i2 + 4096 * i1) + load(primals_12, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_5}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf43, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_5}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_33}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_34}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_35}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf45', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf41, i3 + 64 * i2 + 4096 * i1) - load(buf44, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf43, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_4}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf46', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf45, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_4}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf48', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf47, i3 + 64 * i2 + 4096 * i1) + load(primals_14, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_6}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf50, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_6}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_39}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_40}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_41}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf52', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf38, i3 + 64 * i2 + 4096 * i1) + load(buf48, i3 + 64 * i2 + 4096 * i1) - load(buf51, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf50, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf53', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf52, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_5}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf55', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf54, i3 + 64 * i2 + 4096 * i1) + load(primals_16, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_7}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf57, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_7}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_45}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_46}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_47}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf59', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf55, i3 + 64 * i2 + 4096 * i1) - load(buf58, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf57, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_5}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf60', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf59, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_6}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf62', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf61, i3 + 64 * i2 + 4096 * i1) + load(primals_18, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_8}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf64, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_8}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_51}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_52}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_53}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf66', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf52, i3 + 64 * i2 + 4096 * i1) + load(buf62, i3 + 64 * i2 + 4096 * i1) - load(buf65, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf64, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_20}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_7}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf69', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf68, i3 + 64 * i2 + 4096 * i1) + load(primals_20, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_9}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf71, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_9}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_57}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_58}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_59}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf73', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf69, i3 + 64 * i2 + 4096 * i1) - load(buf72, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf71, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_6}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf73, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_8}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf76', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf75, i3 + 64 * i2 + 4096 * i1) + load(primals_22, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_10}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf78, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_10}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_63}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_64}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_65}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf80', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, i3 + 64 * i2 + 4096 * i1) + load(buf76, i3 + 64 * i2 + 4096 * i1) - load(buf79, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf78, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_25}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_9}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf83', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf82, i3 + 64 * i2 + 4096 * i1) + load(primals_24, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_11}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf85, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_11}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_69}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_70}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_71}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf87', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf83, i3 + 64 * i2 + 4096 * i1) - load(buf86, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf85, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_7}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf88', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf87, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf90', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf89, i3 + 64 * i2 + 4096 * i1) + load(primals_26, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_12}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf92, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_12}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_75}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_76}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_77}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf94', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, i3 + 64 * i2 + 4096 * i1) + load(buf90, i3 + 64 * i2 + 4096 * i1) - load(buf93, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf92, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_30}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf95', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf94, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_11}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf97', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf96, i3 + 64 * i2 + 4096 * i1) + load(primals_28, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_13}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf99, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_13}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_81}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_82}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_83}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf101', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf97, i3 + 64 * i2 + 4096 * i1) - load(buf100, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf99, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_8}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf102', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf101, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_12}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf104', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf103, i3 + 64 * i2 + 4096 * i1) + load(primals_30, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_14}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf106, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_14}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_87}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_88}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_89}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf108', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf94, i3 + 64 * i2 + 4096 * i1) + load(buf104, i3 + 64 * i2 + 4096 * i1) - load(buf107, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf106, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_35}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf109', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf108, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_13}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf111', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf110, i3 + 64 * i2 + 4096 * i1) + load(primals_32, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_15}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf113, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_15}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_93}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_94}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_95}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf115', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf111, i3 + 64 * i2 + 4096 * i1) - load(buf114, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf113, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_9}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf116', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf115, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_14}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf118', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf117, i3 + 64 * i2 + 4096 * i1) + load(primals_34, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_16}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf120, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_16}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_99}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_100}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_101}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf122', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf108, i3 + 64 * i2 + 4096 * i1) + load(buf118, i3 + 64 * i2 + 4096 * i1) - load(buf121, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf120, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_40}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf122, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf125', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf124, i3 + 64 * i2 + 4096 * i1) + load(primals_36, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_17}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf127, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_17}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_105}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_106}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_107}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf129', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf125, i3 + 64 * i2 + 4096 * i1) - load(buf128, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf127, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf130', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf129, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_16}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf132', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf131, i3 + 64 * i2 + 4096 * i1) + load(primals_38, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_18}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf134, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_18}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_111}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_112}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_113}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf136', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf122, i3 + 64 * i2 + 4096 * i1) + load(buf132, i3 + 64 * i2 + 4096 * i1) - load(buf135, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf134, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_45}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf137', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf136, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_17}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf139', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf138, i3 + 64 * i2 + 4096 * i1) + load(primals_40, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_19}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf141, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_19}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_117}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_118}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_119}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf143', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf139, i3 + 64 * i2 + 4096 * i1) - load(buf142, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf141, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 256, 64, 64]),
origins={relu_11}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf144', layout=FixedLayout('cuda', torch.float32, size=[1, 256, 66, 66], stride=[1115136, 4356, 66, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf143, constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i3, torch.int32) - constant(1, torch.int32))) + 64 * constant(63, torch.int32) - abs(constant(63, torch.int32) - abs(index_expr(i2, torch.int32) - constant(1, torch.int32))) + 4096 * i1),
ranges=[1, 256, 66, 66],
origins={reflection_pad2d_18}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf146', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf145, i3 + 64 * i2 + 4096 * i1) + load(primals_42, i1),
ranges=torch.Size([1, 256, 64, 64]),
origins={convolution_20}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf148, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 256, 1, 1],
origins={reciprocal_20}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_123}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_124}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_125}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf150', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 256, 64, 64]), stride=[1048576, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf136, i3 + 64 * i2 + 4096 * i1) + load(buf146, i3 + 64 * i2 + 4096 * i1) - load(buf149, i1) / index_expr(4096, torch.float32) * reciprocal(sqrt(load(buf148, i1) / index_expr(4096, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32),
ranges=torch.Size([1, 256, 64, 64]),
origins={add_50}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf152', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf151, i3 + 128 * i2 + 16384 * i1) + load(primals_44, i1),
ranges=torch.Size([1, 128, 128, 128]),
origins={convolution_21}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf156, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 128, 1, 1],
origins={reciprocal_21}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_129}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_130}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_131}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf159', layout=FixedLayout('cuda', torch.float32, size=torch.Size([1, 128, 128, 128]), stride=[2097152, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf152, i3 + 128 * i2 + 16384 * i1) - load(buf158, i1) / index_expr(16384, torch.float32) * reciprocal(sqrt(load(buf156, i1) / index_expr(16384, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 128, 128, 128]),
origins={relu_12}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf161', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf160, i3 + 256 * i2 + 65536 * i1) + load(primals_46, i1),
ranges=torch.Size([1, 64, 256, 256]),
origins={convolution_22}
))
)), TensorBox(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(sqrt(load(buf165, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))),
ranges=[1, 64, 1, 1],
origins={reciprocal_22}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_135}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_136}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_137}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf168', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([1, 64, 256, 256]), stride=[4194304, 65536, 256, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf161, i3 + 256 * i2 + 65536 * i1) - load(buf167, i1) / index_expr(65536, torch.float32) * reciprocal(sqrt(load(buf165, i1) / index_expr(65536, torch.float32) + constant(1e-05, torch.float32))) * constant(1, torch.float32) + constant(0, torch.float32)),
ranges=torch.Size([1, 64, 256, 256]),
origins={relu_13}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf169', layout=FixedLayout('cuda', torch.float32, size=[1, 64, 262, 262], stride=[4393216, 68644, 262, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf168, constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i3, torch.int32) - constant(3, torch.int32))) + 256 * constant(255, torch.int32) - abs(constant(255, torch.int32) - abs(index_expr(i2, torch.int32) - constant(3, torch.int32))) + 65536 * i1),
ranges=[1, 64, 262, 262],
origins={reflection_pad2d_19}
))
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf170, i3 + 256 * i2 + 65536 * i1) + load(primals_48, i1) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([1, 3, 256, 256]),
origins={sub_23}
)
)), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf167, i1) / index_expr(65536, torch.float32),
ranges=[1, 64, 1, 1],
origins={mean_22}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_132}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_133}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_134}
),
size=(1, 64),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_138}
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_139}
),
size=(1, 64, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_140}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf158, i1) / index_expr(16384, torch.float32),
ranges=[1, 128, 1, 1],
origins={mean_21}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_126}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_127}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_128}
),
size=(1, 128),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_150}
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_151}
),
size=(1, 128, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_152}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf149, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_20}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_120}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_121}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_122}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_162}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_163}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_164}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf142, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_19}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_114}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_115}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_116}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_174}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_175}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_176}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf135, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_18}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_108}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_109}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_110}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_186}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_187}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_188}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf128, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_17}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_102}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_103}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_104}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_198}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_199}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_200}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf121, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_16}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_96}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_97}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_98}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_210}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_211}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_212}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf114, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_15}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_90}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_91}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_92}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_222}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_223}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_224}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf107, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_14}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_84}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_85}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_86}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_234}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_235}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_236}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf100, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_13}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_78}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_79}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_80}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_246}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_247}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_248}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf93, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_12}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_72}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_73}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_74}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_258}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_259}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_260}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf86, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_11}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_66}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_67}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_68}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_270}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_271}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_272}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf79, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_10}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_60}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_61}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_62}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_282}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_283}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_284}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf72, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_9}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_54}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_55}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_56}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_294}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_295}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_296}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf65, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_8}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_48}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_49}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_50}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_306}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_307}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_308}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf58, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_7}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_42}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_43}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_44}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_318}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_319}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_320}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf51, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_6}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_36}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_37}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_38}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_330}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_331}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_332}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf44, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_5}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_30}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_31}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_32}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_342}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_343}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_344}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf37, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_4}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_24}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_25}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_26}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_354}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_355}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_356}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf30, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_3}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_18}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_19}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_20}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_366}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_367}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_368}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf23, i1) / index_expr(4096, torch.float32),
ranges=[1, 256, 1, 1],
origins={mean_2}
)
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_12}
),
size=(1, 256),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_13}
),
size=(256,),
reindex=lambda i0: [0, i0],
origins={squeeze_14}
),
size=(1, 256),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_378}
),
size=(1, 256, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_379}
),
size=(1, 256, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_380}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf17, i1) / index_expr(16384, torch.float32),
ranges=[1, 128, 1, 1],
origins={mean_1}
)
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze_6}
),
size=(1, 128),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_7}
),
size=(128,),
reindex=lambda i0: [0, i0],
origins={squeeze_8}
),
size=(1, 128),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_390}
),
size=(1, 128, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_391}
),
size=(1, 128, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_392}
)
), TensorBox(
View(
View(
View(
View(
View(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf8, i1) / index_expr(65536, torch.float32),
ranges=[1, 64, 1, 1],
origins={mean}
)
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1, 0, 0],
origins={squeeze}
),
size=(1, 64),
reindex=lambda i0, i1: [0, i1, 0],
origins={squeeze_1}
),
size=(64,),
reindex=lambda i0: [0, i0],
origins={squeeze_2}
),
size=(1, 64),
reindex=lambda i0, i1: [i1],
origins={unsqueeze_402}
),
size=(1, 64, 1),
reindex=lambda i0, i1, i2: [0, i1],
origins={unsqueeze_403}
),
size=(1, 64, 1, 1),
reindex=lambda i0, i1, i2, i3: [0, i1, 0],
origins={unsqueeze_404}
)
), 1, 64, 256, 256, 128, 128, 128, 256, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 128, 256, 256]
While executing return [sub_23, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, reflection_pad2d, convolution, squeeze_5, view_3, convolution_1, squeeze_11, view_9, convolution_2, squeeze_17, view_15, reflection_pad2d_1, convolution_3, squeeze_23, view_21, reflection_pad2d_2, convolution_4, squeeze_29, add_10, reflection_pad2d_3, convolution_5, squeeze_35, view_29, reflection_pad2d_4, convolution_6, squeeze_41, add_15, reflection_pad2d_5, convolution_7, squeeze_47, view_37, reflection_pad2d_6, convolution_8, squeeze_53, add_20, reflection_pad2d_7, convolution_9, squeeze_59, view_45, reflection_pad2d_8, convolution_10, squeeze_65, add_25, reflection_pad2d_9, convolution_11, squeeze_71, view_53, reflection_pad2d_10, convolution_12, squeeze_77, add_30, reflection_pad2d_11, convolution_13, squeeze_83, view_61, reflection_pad2d_12, convolution_14, squeeze_89, add_35, reflection_pad2d_13, convolution_15, squeeze_95, view_69, reflection_pad2d_14, convolution_16, squeeze_101, add_40, reflection_pad2d_15, convolution_17, squeeze_107, view_77, reflection_pad2d_16, convolution_18, squeeze_113, add_45, reflection_pad2d_17, convolution_19, squeeze_119, view_85, reflection_pad2d_18, convolution_20, squeeze_125, add_50, convolution_21, squeeze_131, view_93, convolution_22, squeeze_137, view_99, reflection_pad2d_19, sub_23, unsqueeze_140, unsqueeze_152, unsqueeze_164, unsqueeze_176, unsqueeze_188, unsqueeze_200, unsqueeze_212, unsqueeze_224, unsqueeze_236, unsqueeze_248, unsqueeze_260, unsqueeze_272, unsqueeze_284, unsqueeze_296, unsqueeze_308, unsqueeze_320, unsqueeze_332, unsqueeze_344, unsqueeze_356, unsqueeze_368, unsqueeze_380, unsqueeze_392, unsqueeze_404, sym_size, mul, sym_size_1, sym_size_2, mul_3, sym_size_3, sym_size_4, mul_6, sym_size_5, sym_size_6, sym_size_7, sym_size_8, sym_size_9, sym_size_10, sym_size_11, sym_size_12, sym_size_13, sym_size_14, sym_size_15, sym_size_16, sym_size_17, sym_size_18, sym_size_19, sym_size_20, sym_size_21, sym_size_22, sym_size_23, sym_size_24, sym_size_25, sym_size_26, sym_size_27, sym_size_28, sym_size_29, sym_size_30, sym_size_31, sym_size_32, sym_size_33, sym_size_34, sym_size_35, sym_size_36, sym_size_37, sym_size_38, sym_size_39, sym_size_40, sym_size_41, sym_size_42, sym_size_43, sym_size_44, sym_size_45, sym_size_46]
Original traceback:
None
--dataroot /scratch/ezyang/work/torchbenchmark/torchbenchmark/data/.data/pytorch_CycleGAN_and_pix2pix_inputs/datasets/horse2zebra --name horse2zebra --model cycle_gan --display_id 0 --n_epochs 3 --n_epochs_decay 3 --gpu_ids 0 --checkpoints_dir /scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/.data/checkpoints
TorchDynamo optimized model failed to run because of following error
cuda train pytorch_CycleGAN_and_pix2pix FAIL
Running torchbench.py pytorch_stargan...
ERROR:common:[TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf88, i3 + 128 * i2 + 16384 * i1 + 49152 * i0) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([16, 3, 128, 128]),
origins={sub_17}
)
)), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 8, 7, 7], stride=[392, 49, 7, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[3, 64, 7, 7], stride=[3136, 49, 7, 1]))
)), TensorBox(StorageBox(
ConcatKernel(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[s0, 8, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), inputs=[ComputedBuffer(name='buf0', layout=AliasedLayout('cuda', torch.float32, size=[s0, s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_104, i3 + i1 * s2**2 + i2 * s2 + i0 * s1 * s2**2),
ranges=[s0, s1, s2, s2],
origins={cat}
)), ComputedBuffer(name='buf1', layout=AliasedLayout('cuda', torch.float32, size=[s0, 8 - s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_105, i1 + i0 * 8 + -1 * s1),
ranges=[s0, 8 - s1, s2, s2],
origins={cat}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf4', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_2, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_53, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_3}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf6', layout=FlexibleLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_54, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_4}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf3,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 64, 128, 128]), stride=[1048576, 16384, 128, 1]),
inputs=[ConcatKernel(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[s0, 8, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), inputs=[ComputedBuffer(name='buf0', layout=AliasedLayout('cuda', torch.float32, size=[s0, s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_104, i3 + i1 * s2**2 + i2 * s2 + i0 * s1 * s2**2),
ranges=[s0, s1, s2, s2],
origins={cat}
)), ComputedBuffer(name='buf1', layout=AliasedLayout('cuda', torch.float32, size=[s0, 8 - s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_105, i1 + i0 * 8 + -1 * s1),
ranges=[s0, 8 - s1, s2, s2],
origins={cat}
))]), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 8, 7, 7], stride=[392, 49, 7, 1]))],
constant_args=(None, (1, 1), (3, 3), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution}
)
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_1}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf7', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf3, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)) - load(buf5, i1 + 64 * i0) * reciprocal(sqrt(load(buf6, i1 + 64 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf4, i1 + 64 * i0) + load(primals_3, ModularIndexing(i1, 1, 64))),
ranges=(16, 64, 128, 128),
origins={relu}
))
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_5}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_5, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_5}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf10', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_56, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_7}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf11', layout=FlexibleLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_57, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_8}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf8,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 128, 64, 64]), stride=[524288, 4096, 64, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf7', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf3, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)) - load(buf5, i1 + 64 * i0) * reciprocal(sqrt(load(buf6, i1 + 64 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf4, i1 + 64 * i0) + load(primals_3, ModularIndexing(i1, 1, 64))),
ranges=(16, 64, 128, 128),
origins={relu}
))
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_5}
),
FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]),
origins={convolution_1}
), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_1}
)
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_9}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf10, i1 + 128 * i0) * reciprocal(sqrt(load(buf11, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf9, i1 + 128 * i0) + load(primals_6, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_1}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_13}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf14', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_8, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_9}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_59, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_11}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf16', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_60, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_12}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf13,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf10, i1 + 128 * i0) * reciprocal(sqrt(load(buf11, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf9, i1 + 128 * i0) + load(primals_6, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_1}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_13}
),
FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]),
origins={convolution_2}
), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_2}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_17}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf13, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf15, i1 + 256 * i0) * reciprocal(sqrt(load(buf16, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf14, i1 + 256 * i0) + load(primals_9, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_2}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_21}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_11, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_13}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf20', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_62, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf21', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_63, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_16}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf18,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf13, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf15, i1 + 256 * i0) * reciprocal(sqrt(load(buf16, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf14, i1 + 256 * i0) + load(primals_9, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_2}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_21}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_3}
), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_3}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_25}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf22', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf18, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf20, i1 + 256 * i0) * reciprocal(sqrt(load(buf21, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf19, i1 + 256 * i0) + load(primals_12, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_3}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_29}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_14, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_17}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf25', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_65, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_19}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf26', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_66, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_20}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf23,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf22', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf18, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf20, i1 + 256 * i0) * reciprocal(sqrt(load(buf21, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf19, i1 + 256 * i0) + load(primals_12, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_3}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_29}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_4}
), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_4}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_33}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf27', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf17, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) + load(buf23, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf25, i1 + 256 * i0) * reciprocal(sqrt(load(buf26, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf24, i1 + 256 * i0) + load(primals_15, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf29', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_17, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_21}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf30', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_68, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_23}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf31', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_69, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_24}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf28,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf27', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf17, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) + load(buf23, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf25, i1 + 256 * i0) * reciprocal(sqrt(load(buf26, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf24, i1 + 256 * i0) + load(primals_15, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_10}
)), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_5}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_37}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf28, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf30, i1 + 256 * i0) * reciprocal(sqrt(load(buf31, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf29, i1 + 256 * i0) + load(primals_18, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_4}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_41}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_20, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_25}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_71, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_27}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf36', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_72, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_28}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf33,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf28, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf30, i1 + 256 * i0) * reciprocal(sqrt(load(buf31, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf29, i1 + 256 * i0) + load(primals_18, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_4}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_41}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_6}
), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_6}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_45}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf33, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf35, i1 + 256 * i0) * reciprocal(sqrt(load(buf36, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf34, i1 + 256 * i0) + load(primals_21, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf39', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_23, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_29}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf40', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_74, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_31}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf41', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_75, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_32}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf38,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf33, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf35, i1 + 256 * i0) * reciprocal(sqrt(load(buf36, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf34, i1 + 256 * i0) + load(primals_21, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_15}
)), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_7}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_49}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf38, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf40, i1 + 256 * i0) * reciprocal(sqrt(load(buf41, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf39, i1 + 256 * i0) + load(primals_24, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_5}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_53}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_26, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_33}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf45', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_77, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_35}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf46', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_78, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_36}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf43,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf38, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf40, i1 + 256 * i0) * reciprocal(sqrt(load(buf41, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf39, i1 + 256 * i0) + load(primals_24, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_5}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_53}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_8}
), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_8}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_57}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf47', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf37, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf43, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf45, i1 + 256 * i0) * reciprocal(sqrt(load(buf46, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf44, i1 + 256 * i0) + load(primals_27, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_20}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf49', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_29, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_37}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_80, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_39}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf51', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_81, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_40}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf48,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf47', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf37, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf43, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf45, i1 + 256 * i0) * reciprocal(sqrt(load(buf46, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf44, i1 + 256 * i0) + load(primals_27, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_20}
)), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_9}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_61}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf52', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf48, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf50, i1 + 256 * i0) * reciprocal(sqrt(load(buf51, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf49, i1 + 256 * i0) + load(primals_30, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_6}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_65}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_32, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_41}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf55', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_83, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_43}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf56', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_84, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_44}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf53,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf52', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf48, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf50, i1 + 256 * i0) * reciprocal(sqrt(load(buf51, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf49, i1 + 256 * i0) + load(primals_30, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_6}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_65}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_10}
), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_10}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_69}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf57', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf47, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf53, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf55, i1 + 256 * i0) * reciprocal(sqrt(load(buf56, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf54, i1 + 256 * i0) + load(primals_33, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_25}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf59', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_35, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_45}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf60', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_86, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_47}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf61', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_87, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_48}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf58,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf57', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf47, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf53, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf55, i1 + 256 * i0) * reciprocal(sqrt(load(buf56, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf54, i1 + 256 * i0) + load(primals_33, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_25}
)), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_11}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_73}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf62', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf58, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf60, i1 + 256 * i0) * reciprocal(sqrt(load(buf61, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf59, i1 + 256 * i0) + load(primals_36, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_7}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_77}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf64', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_38, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_49}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_89, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_51}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf66', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_90, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_52}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf63,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf62', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf58, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf60, i1 + 256 * i0) * reciprocal(sqrt(load(buf61, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf59, i1 + 256 * i0) + load(primals_36, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_7}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_77}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_12}
), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_12}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_81}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf63, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf65, i1 + 256 * i0) * reciprocal(sqrt(load(buf66, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf64, i1 + 256 * i0) + load(primals_39, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_30}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf69', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_41, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_53}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf70', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_92, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_55}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf71', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_93, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_56}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf68,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf63, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf65, i1 + 256 * i0) * reciprocal(sqrt(load(buf66, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf64, i1 + 256 * i0) + load(primals_39, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_30}
)), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_13}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_85}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf68, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf70, i1 + 256 * i0) * reciprocal(sqrt(load(buf71, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf69, i1 + 256 * i0) + load(primals_42, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_8}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_89}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_44, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_57}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf75', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_95, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_59}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf76', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_96, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_60}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf73,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf68, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf70, i1 + 256 * i0) * reciprocal(sqrt(load(buf71, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf69, i1 + 256 * i0) + load(primals_42, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_8}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_89}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_14}
), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_14}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_93}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf77', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf67, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf73, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf75, i1 + 256 * i0) * reciprocal(sqrt(load(buf76, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf74, i1 + 256 * i0) + load(primals_45, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_35}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf79', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_47, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_61}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf80', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_98, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_63}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf81', layout=FlexibleLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_99, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_64}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf78,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 128, 64, 64]), stride=[524288, 4096, 64, 1]),
inputs=[ComputedBuffer(name='buf77', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf67, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf73, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf75, i1 + 256 * i0) * reciprocal(sqrt(load(buf76, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf74, i1 + 256 * i0) + load(primals_45, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_35}
)), InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), True, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_15}
)
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_97}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf82', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf78, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf80, i1 + 128 * i0) * reciprocal(sqrt(load(buf81, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf79, i1 + 128 * i0) + load(primals_48, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_9}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_101}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf84', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_50, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_65}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_101, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_67}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf86', layout=FlexibleLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_102, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_68}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf83,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 64, 128, 128]), stride=[1048576, 16384, 128, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf82', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf78, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf80, i1 + 128 * i0) * reciprocal(sqrt(load(buf81, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf79, i1 + 128 * i0) + load(primals_48, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_9}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_101}
),
FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]),
origins={convolution_16}
), InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), True, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_16}
)
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_105}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf87', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf83, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)) - load(buf85, i1 + 64 * i0) * reciprocal(sqrt(load(buf86, i1 + 64 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf84, i1 + 64 * i0) + load(primals_51, ModularIndexing(i1, 1, 64))),
ranges=(16, 64, 128, 128),
origins={relu_10}
))
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_109}
)
), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf88, i3 + 128 * i2 + 16384 * i1 + 49152 * i0) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([16, 3, 128, 128]),
origins={sub_17}
)
)), s0, 128, 128, 64, 64, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 64, 64, 128, 128]
While executing return [sub_17, primals_1, primals_4, primals_7, primals_10, primals_13, primals_16, primals_19, primals_22, primals_25, primals_28, primals_31, primals_34, primals_37, primals_40, primals_43, primals_46, primals_49, primals_52, cat, repeat_1, repeat_3, repeat_4, view_1, view_5, repeat_5, repeat_7, repeat_8, view_9, view_13, repeat_9, repeat_11, repeat_12, view_17, view_21, repeat_13, repeat_15, repeat_16, view_25, view_29, repeat_17, repeat_19, repeat_20, view_33, add_10, repeat_21, repeat_23, repeat_24, view_37, view_41, repeat_25, repeat_27, repeat_28, view_45, add_15, repeat_29, repeat_31, repeat_32, view_49, view_53, repeat_33, repeat_35, repeat_36, view_57, add_20, repeat_37, repeat_39, repeat_40, view_61, view_65, repeat_41, repeat_43, repeat_44, view_69, add_25, repeat_45, repeat_47, repeat_48, view_73, view_77, repeat_49, repeat_51, repeat_52, view_81, add_30, repeat_53, repeat_55, repeat_56, view_85, view_89, repeat_57, repeat_59, repeat_60, view_93, add_35, repeat_61, repeat_63, repeat_64, view_97, view_101, repeat_65, repeat_67, repeat_68, view_105, view_109, sub_17, sym_size_4, sym_size_5, sym_size_6, sym_size_8, sym_size_9, sym_size_11, sym_size_12, sym_size_14, sym_size_15, sym_size_17, sym_size_18, sym_size_19, sym_size_20, sym_size_22, sym_size_23, sym_size_24, sym_size_25, sym_size_27, sym_size_28, sym_size_29, sym_size_30, sym_size_32, sym_size_33, sym_size_34, sym_size_35, sym_size_37, sym_size_38, sym_size_39, sym_size_40, sym_size_42, sym_size_43, sym_size_44, sym_size_45, sym_size_47, sym_size_48]
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_stargan/model.py", line 55, in forward
def forward(self, x, c):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 267, in output
assert all(
AssertionError: [TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf88, i3 + 128 * i2 + 16384 * i1 + 49152 * i0) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([16, 3, 128, 128]),
origins={sub_17}
)
)), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 8, 7, 7], stride=[392, 49, 7, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[3, 64, 7, 7], stride=[3136, 49, 7, 1]))
)), TensorBox(StorageBox(
ConcatKernel(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[s0, 8, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), inputs=[ComputedBuffer(name='buf0', layout=AliasedLayout('cuda', torch.float32, size=[s0, s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_104, i3 + i1 * s2**2 + i2 * s2 + i0 * s1 * s2**2),
ranges=[s0, s1, s2, s2],
origins={cat}
)), ComputedBuffer(name='buf1', layout=AliasedLayout('cuda', torch.float32, size=[s0, 8 - s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_105, i1 + i0 * 8 + -1 * s1),
ranges=[s0, 8 - s1, s2, s2],
origins={cat}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf4', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_2, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_53, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_3}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf6', layout=FlexibleLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_54, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_4}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf3,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 64, 128, 128]), stride=[1048576, 16384, 128, 1]),
inputs=[ConcatKernel(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[s0, 8, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), inputs=[ComputedBuffer(name='buf0', layout=AliasedLayout('cuda', torch.float32, size=[s0, s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_104, i3 + i1 * s2**2 + i2 * s2 + i0 * s1 * s2**2),
ranges=[s0, s1, s2, s2],
origins={cat}
)), ComputedBuffer(name='buf1', layout=AliasedLayout('cuda', torch.float32, size=[s0, 8 - s1, s2, s2], stride=[8*s2**2, s2**2, s2, 1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_105, i1 + i0 * 8 + -1 * s1),
ranges=[s0, 8 - s1, s2, s2],
origins={cat}
))]), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 8, 7, 7], stride=[392, 49, 7, 1]))],
constant_args=(None, (1, 1), (3, 3), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution}
)
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_1}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf7', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf3, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)) - load(buf5, i1 + 64 * i0) * reciprocal(sqrt(load(buf6, i1 + 64 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf4, i1 + 64 * i0) + load(primals_3, ModularIndexing(i1, 1, 64))),
ranges=(16, 64, 128, 128),
origins={relu}
))
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_5}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_5, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_5}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf10', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_56, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_7}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf11', layout=FlexibleLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_57, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_8}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf8,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 128, 64, 64]), stride=[524288, 4096, 64, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf7', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf3, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)) - load(buf5, i1 + 64 * i0) * reciprocal(sqrt(load(buf6, i1 + 64 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf4, i1 + 64 * i0) + load(primals_3, ModularIndexing(i1, 1, 64))),
ranges=(16, 64, 128, 128),
origins={relu}
))
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_5}
),
FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]),
origins={convolution_1}
), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_1}
)
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_9}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf10, i1 + 128 * i0) * reciprocal(sqrt(load(buf11, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf9, i1 + 128 * i0) + load(primals_6, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_1}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_13}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf14', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_8, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_9}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_59, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_11}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf16', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_60, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_12}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf13,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf10, i1 + 128 * i0) * reciprocal(sqrt(load(buf11, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf9, i1 + 128 * i0) + load(primals_6, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_1}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_13}
),
FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]),
origins={convolution_2}
), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_2}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_17}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf13, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf15, i1 + 256 * i0) * reciprocal(sqrt(load(buf16, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf14, i1 + 256 * i0) + load(primals_9, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_2}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_21}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_11, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_13}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf20', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_62, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf21', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_63, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_16}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf18,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf13, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf15, i1 + 256 * i0) * reciprocal(sqrt(load(buf16, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf14, i1 + 256 * i0) + load(primals_9, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_2}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_21}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_3}
), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_3}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_25}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf22', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf18, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf20, i1 + 256 * i0) * reciprocal(sqrt(load(buf21, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf19, i1 + 256 * i0) + load(primals_12, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_3}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_29}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_14, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_17}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf25', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_65, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_19}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf26', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_66, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_20}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf23,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf22', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf18, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf20, i1 + 256 * i0) * reciprocal(sqrt(load(buf21, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf19, i1 + 256 * i0) + load(primals_12, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_3}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_29}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_4}
), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_4}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_33}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf27', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf17, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) + load(buf23, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf25, i1 + 256 * i0) * reciprocal(sqrt(load(buf26, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf24, i1 + 256 * i0) + load(primals_15, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_10}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf29', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_17, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_21}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf30', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_68, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_23}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf31', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_69, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_24}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf28,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf27', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf17, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) + load(buf23, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf25, i1 + 256 * i0) * reciprocal(sqrt(load(buf26, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf24, i1 + 256 * i0) + load(primals_15, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_10}
)), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_5}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_37}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf28, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf30, i1 + 256 * i0) * reciprocal(sqrt(load(buf31, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf29, i1 + 256 * i0) + load(primals_18, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_4}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_41}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_20, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_25}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_71, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_27}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf36', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_72, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_28}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf33,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf32', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf28, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf30, i1 + 256 * i0) * reciprocal(sqrt(load(buf31, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf29, i1 + 256 * i0) + load(primals_18, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_4}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_41}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_6}
), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_6}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_45}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf33, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf35, i1 + 256 * i0) * reciprocal(sqrt(load(buf36, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf34, i1 + 256 * i0) + load(primals_21, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_15}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf39', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_23, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_29}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf40', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_74, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_31}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf41', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_75, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_32}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf38,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf33, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf35, i1 + 256 * i0) * reciprocal(sqrt(load(buf36, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf34, i1 + 256 * i0) + load(primals_21, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_15}
)), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_7}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_49}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf38, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf40, i1 + 256 * i0) * reciprocal(sqrt(load(buf41, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf39, i1 + 256 * i0) + load(primals_24, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_5}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_53}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_26, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_33}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf45', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_77, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_35}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf46', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_78, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_36}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf43,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf38, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf40, i1 + 256 * i0) * reciprocal(sqrt(load(buf41, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf39, i1 + 256 * i0) + load(primals_24, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_5}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_53}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_8}
), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_8}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_57}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf47', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf37, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf43, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf45, i1 + 256 * i0) * reciprocal(sqrt(load(buf46, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf44, i1 + 256 * i0) + load(primals_27, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_20}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf49', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_29, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_37}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_80, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_39}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf51', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_81, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_40}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf48,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf47', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf37, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf43, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf45, i1 + 256 * i0) * reciprocal(sqrt(load(buf46, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf44, i1 + 256 * i0) + load(primals_27, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_20}
)), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_9}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_61}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf52', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf48, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf50, i1 + 256 * i0) * reciprocal(sqrt(load(buf51, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf49, i1 + 256 * i0) + load(primals_30, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_6}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_65}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_32, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_41}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf55', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_83, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_43}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf56', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_84, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_44}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf53,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf52', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf48, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf50, i1 + 256 * i0) * reciprocal(sqrt(load(buf51, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf49, i1 + 256 * i0) + load(primals_30, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_6}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_65}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_10}
), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_10}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_69}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf57', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf47, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf53, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf55, i1 + 256 * i0) * reciprocal(sqrt(load(buf56, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf54, i1 + 256 * i0) + load(primals_33, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_25}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf59', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_35, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_45}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf60', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_86, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_47}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf61', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_87, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_48}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf58,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf57', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf47, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf53, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf55, i1 + 256 * i0) * reciprocal(sqrt(load(buf56, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf54, i1 + 256 * i0) + load(primals_33, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_25}
)), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_11}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_73}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf62', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf58, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf60, i1 + 256 * i0) * reciprocal(sqrt(load(buf61, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf59, i1 + 256 * i0) + load(primals_36, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_7}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_77}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf64', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_38, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_49}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_89, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_51}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf66', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_90, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_52}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf63,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf62', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf58, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf60, i1 + 256 * i0) * reciprocal(sqrt(load(buf61, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf59, i1 + 256 * i0) + load(primals_36, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_7}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_77}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_12}
), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_12}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_81}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf63, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf65, i1 + 256 * i0) * reciprocal(sqrt(load(buf66, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf64, i1 + 256 * i0) + load(primals_39, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_30}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf69', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_41, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_53}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf70', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_92, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_55}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf71', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_93, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_56}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf68,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf63, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf65, i1 + 256 * i0) * reciprocal(sqrt(load(buf66, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf64, i1 + 256 * i0) + load(primals_39, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_30}
)), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_13}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_85}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf68, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf70, i1 + 256 * i0) * reciprocal(sqrt(load(buf71, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf69, i1 + 256 * i0) + load(primals_42, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_8}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_89}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_44, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_57}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf75', layout=FixedLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_95, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_59}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf76', layout=FlexibleLayout('cuda', torch.float32, size=[256*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_96, ModularIndexing(i0, 1, 256)),
ranges=[256*s0],
origins={repeat_60}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf73,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 256, 32, 32]), stride=[262144, 1024, 32, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf68, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf70, i1 + 256 * i0) * reciprocal(sqrt(load(buf71, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf69, i1 + 256 * i0) + load(primals_42, ModularIndexing(i1, 1, 256))),
ranges=(16, 256, 32, 32),
origins={relu_8}
))
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_89}
),
FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]),
origins={convolution_14}
), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 256, 3, 3], stride=[2304, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_14}
)
),
size=(1, 4096, 32, 32),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 256, 16), ModularIndexing(i1, 1, 256), i2, i3],
origins={view_93}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf77', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf67, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf73, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf75, i1 + 256 * i0) * reciprocal(sqrt(load(buf76, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf74, i1 + 256 * i0) + load(primals_45, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_35}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf79', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_47, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_61}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf80', layout=FixedLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_98, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_63}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf81', layout=FlexibleLayout('cuda', torch.float32, size=[128*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_99, ModularIndexing(i0, 1, 128)),
ranges=[128*s0],
origins={repeat_64}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf78,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 128, 64, 64]), stride=[524288, 4096, 64, 1]),
inputs=[ComputedBuffer(name='buf77', layout=FixedLayout('cuda', torch.float32, size=(16, 256, 32, 32), stride=[262144, 1024, 32, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf67, i3 + 32 * i2 + 1024 * i1 + 262144 * i0) + load(buf73, i3 + 32 * i2 + 1024 * ModularIndexing(i1, 1, 256) + 262144 * ModularIndexing(i1 + 256 * i0, 256, 16)) - load(buf75, i1 + 256 * i0) * reciprocal(sqrt(load(buf76, i1 + 256 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf74, i1 + 256 * i0) + load(primals_45, ModularIndexing(i1, 1, 256)),
ranges=(16, 256, 32, 32),
origins={add_35}
)), InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[256, 128, 4, 4], stride=[2048, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), True, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_15}
)
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_97}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf82', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf78, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf80, i1 + 128 * i0) * reciprocal(sqrt(load(buf81, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf79, i1 + 128 * i0) + load(primals_48, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_9}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_101}
)
), TensorBox(StorageBox(
ComputedBuffer(name='buf84', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_50, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_65}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_101, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_67}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf86', layout=FlexibleLayout('cuda', torch.float32, size=[64*s0], stride=[1]), data=Pointwise(
'cuda',
torch.float32,
load(primals_102, ModularIndexing(i0, 1, 64)),
ranges=[64*s0],
origins={repeat_68}
))
)), TensorBox(
View(
StorageBox(
Convolution(
name=buf83,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([16, 64, 128, 128]), stride=[1048576, 16384, 128, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf82', layout=FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf78, i3 + 64 * i2 + 4096 * ModularIndexing(i1, 1, 128) + 524288 * ModularIndexing(i1 + 128 * i0, 128, 16)) - load(buf80, i1 + 128 * i0) * reciprocal(sqrt(load(buf81, i1 + 128 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf79, i1 + 128 * i0) + load(primals_48, ModularIndexing(i1, 1, 128))),
ranges=(16, 128, 64, 64),
origins={relu_9}
))
),
size=(1, 2048, 64, 64),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 128, 16), ModularIndexing(i1, 1, 128), i2, i3],
origins={view_101}
),
FixedLayout('cuda', torch.float32, size=(16, 128, 64, 64), stride=[524288, 4096, 64, 1]),
origins={convolution_16}
), InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[128, 64, 4, 4], stride=[1024, 16, 4, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), True, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_16}
)
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_105}
)
), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf87', layout=FixedLayout('cuda', torch.float32, size=(16, 64, 128, 128), stride=[1048576, 16384, 128, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf83, i3 + 128 * i2 + 16384 * ModularIndexing(i1, 1, 64) + 1048576 * ModularIndexing(i1 + 64 * i0, 64, 16)) - load(buf85, i1 + 64 * i0) * reciprocal(sqrt(load(buf86, i1 + 64 * i0) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(buf84, i1 + 64 * i0) + load(primals_51, ModularIndexing(i1, 1, 64))),
ranges=(16, 64, 128, 128),
origins={relu_10}
))
),
size=(1, 1024, 128, 128),
reindex=lambda i0, i1, i2, i3: [ModularIndexing(i1, 64, 16), ModularIndexing(i1, 1, 64), i2, i3],
origins={view_109}
)
), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.float32,
reciprocal(exp(load(buf88, i3 + 128 * i2 + 16384 * i1 + 49152 * i0) * constant(-2.0, torch.float32)) + constant(1.0, torch.float32)) * constant(2.0, torch.float32) - constant(1.0, torch.float32),
ranges=torch.Size([16, 3, 128, 128]),
origins={sub_17}
)
)), s0, 128, 128, 64, 64, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 64, 64, 128, 128]
While executing return [sub_17, primals_1, primals_4, primals_7, primals_10, primals_13, primals_16, primals_19, primals_22, primals_25, primals_28, primals_31, primals_34, primals_37, primals_40, primals_43, primals_46, primals_49, primals_52, cat, repeat_1, repeat_3, repeat_4, view_1, view_5, repeat_5, repeat_7, repeat_8, view_9, view_13, repeat_9, repeat_11, repeat_12, view_17, view_21, repeat_13, repeat_15, repeat_16, view_25, view_29, repeat_17, repeat_19, repeat_20, view_33, add_10, repeat_21, repeat_23, repeat_24, view_37, view_41, repeat_25, repeat_27, repeat_28, view_45, add_15, repeat_29, repeat_31, repeat_32, view_49, view_53, repeat_33, repeat_35, repeat_36, view_57, add_20, repeat_37, repeat_39, repeat_40, view_61, view_65, repeat_41, repeat_43, repeat_44, view_69, add_25, repeat_45, repeat_47, repeat_48, view_73, view_77, repeat_49, repeat_51, repeat_52, view_81, add_30, repeat_53, repeat_55, repeat_56, view_85, view_89, repeat_57, repeat_59, repeat_60, view_93, add_35, repeat_61, repeat_63, repeat_64, view_97, view_101, repeat_65, repeat_67, repeat_68, view_105, view_109, sub_17, sym_size_4, sym_size_5, sym_size_6, sym_size_8, sym_size_9, sym_size_11, sym_size_12, sym_size_14, sym_size_15, sym_size_17, sym_size_18, sym_size_19, sym_size_20, sym_size_22, sym_size_23, sym_size_24, sym_size_25, sym_size_27, sym_size_28, sym_size_29, sym_size_30, sym_size_32, sym_size_33, sym_size_34, sym_size_35, sym_size_37, sym_size_38, sym_size_39, sym_size_40, sym_size_42, sym_size_43, sym_size_44, sym_size_45, sym_size_47, sym_size_48]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train pytorch_stargan FAIL
Running torchbench.py pytorch_struct...
ERROR:common:'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_struct/networks/NeuralCFG.py", line 49, in terms
torch.einsum("vh,th->tv", self.word_emb, self.mlp1(self.term_emb))
| File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_struct/networks/NeuralCFG.py", line 77, in forward
return terms(input), rules(batch), roots(batch)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 337, in <graph break in forward_and_backward_pass>
self.grad_scaler.scale(loss).backward()
File "/scratch/ezyang/work/pytorch/torch/_tensor.py", line 450, in backward
torch.autograd.backward(
File "/scratch/ezyang/work/pytorch/torch/autograd/__init__.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/scratch/ezyang/work/pytorch/torch/autograd/function.py", line 270, in apply
return user_fn(self, *args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 558, in backward
CompiledFunction.compiled_bw = aot_config.bw_compiler(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/optimizations/backends.py", line 555, in _wrapped_bw_compiler
return disable(disable(bw_compiler)(*args, **kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 362, in bw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 199, in placeholder
sizes, strides = self.static_sizes_strides(example)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 53, in static_sizes_strides
size = [sympy.Integer(i) for i in ex.size()]
AttributeError: 'int' object has no attribute 'size'
While executing %sym_size : [#users=2] = placeholder[target=sym_size]
Original traceback:
Module stack: {}
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_struct/networks/NeuralCFG.py", line 49, in terms
torch.einsum("vh,th->tv", self.word_emb, self.mlp1(self.term_emb))
| File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/pytorch_struct/networks/NeuralCFG.py", line 77, in forward
return terms(input), rules(batch), roots(batch)
TorchDynamo optimized model failed to run because of following error
cuda train pytorch_struct FAIL
Running torchbench.py pytorch_unet...
ERROR:common:Failed running call_function <built-in function pad>(*(FakeTensor(FakeTensor(..., device='meta',
size=(s0, s7, -4.0*s1 + 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 + 14.0, -4.0*s1 + 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 + 14.0),
grad_fn=<UpsampleBilinear2DBackward1>), cuda:0), [(2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2, 2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - (2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2 - 7.0, (2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2, 2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - (2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2 - 7.0]), **{}):
cannot determine truth value of Relational
(scroll up for backtrace)
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 52, in _run_node
return node.target(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_subclasses/fake_tensor.py", line 849, in __torch_dispatch__
return decomposition_table[func](*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_refs/__init__.py", line 2561, in constant_pad_nd
if pad[pad_idx] < 0:
File "/scratch/ezyang/work/pytorch/torch/__init__.py", line 214, in __bool__
return self.node.bool_()
File "/scratch/ezyang/work/pytorch/torch/fx/experimental/symbolic_shapes.py", line 203, in bool_
return bool(self.shape_env.evaluate_expr(self.shape_env.replace(self.expr)))
File "/scratch/ezyang/work/env/lib/python3.9/site-packages/sympy/core/relational.py", line 511, in __bool__
raise TypeError("cannot determine truth value of Relational")
TypeError: cannot determine truth value of Relational
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 251, in catch_errors
return callback(frame, cache_size)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 459, in _convert_frame
result = inner_convert(frame, cache_size)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 112, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 341, in _convert_frame_assert
return _compile(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 396, in _compile
out_code = transform_code_object(code, transform)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
transformations(instructions, code_options)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/convert_frame.py", line 384, in transform
tracer.run()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 1494, in run
super().run()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 359, in run
and self.step()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 329, in step
getattr(self, inst.opname)(inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 181, in wrapper
return inner_fn(self, inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 786, in CALL_FUNCTION
self.call_function(fn, args, {})
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 271, in call_function
self.push(fn.call_function(self, args, kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/nn_module.py", line 221, in call_function
return tx.inline_user_function_return(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 300, in inline_user_function_return
result = InliningInstructionTranslator.inline_call(self, fn, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 1566, in inline_call
return cls.inline_call_(parent, func, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 1620, in inline_call_
tracer.run()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 359, in run
and self.step()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 329, in step
getattr(self, inst.opname)(inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 181, in wrapper
return inner_fn(self, inst)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 786, in CALL_FUNCTION
self.call_function(fn, args, {})
File "/scratch/ezyang/work/pytorch/torch/_dynamo/symbolic_convert.py", line 271, in call_function
self.push(fn.call_function(self, args, kwargs))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/torch.py", line 408, in call_function
tensor_variable = TensorVariable.create(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 199, in create
example_value = _get_fake_value(proxy.node, tx)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 130, in _get_fake_value
return wrap_fake_exception(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 709, in wrap_fake_exception
return fn()
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 131, in <lambda>
lambda: _run_node(tx.output, node, args, kwargs, nnmodule)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/variables/tensor.py", line 61, in _run_node
raise RuntimeError(
RuntimeError: Failed running call_function <built-in function pad>(*(FakeTensor(FakeTensor(..., device='meta',
size=(s0, s7, -4.0*s1 + 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 + 14.0, -4.0*s1 + 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 + 14.0),
grad_fn=<UpsampleBilinear2DBackward1>), cuda:0), [(2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2, 2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - (2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s3 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2 - 7.0, (2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2, 2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - (2.0*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 - 2.0*(-2*s1 + (-2*s1 + (-2*s1 + (-2*s1 + s2 + 4)//2 + 5)//2 + 5)//2 + 5)//2 - 7.0)//2 - 7.0]), **{}):
cannot determine truth value of Relational
(scroll up for backtrace)
TorchDynamo optimized model failed to run because of following error
cuda train pytorch_unet FAIL
Running torchbench.py resnet18...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/resnet.py", line 284, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/ys/cys3jrob4wbdly6mo5wv2bpqu36lalfg3tcid6gae26iq2dogaqe.py", line 791, in call
return (buf44, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_63, primals_64, primals_66, primals_67, primals_69, primals_70, primals_72, primals_73, primals_75, primals_76, primals_78, primals_79, primals_81, primals_82, primals_84, primals_85, primals_87, primals_88, primals_90, primals_91, primals_93, primals_94, primals_96, primals_97, primals_99, primals_100, primals_102, primals_103, primals_105, primals_106, primals_108, primals_109, primals_111, primals_112, primals_114, primals_115, primals_117, primals_118, primals_120, primals_121, primals_123, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf10, buf11, buf12, buf13, buf14, buf15, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf27, buf28, buf29, buf30, buf31, buf32, buf33, buf34, buf35, buf37, buf38, buf39, buf40, as_strided(buf43, (2, 512), (512, 1)), as_strided(primals_61, (1000, 512), (512, 1)), buf45, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train resnet18 FAIL
Running torchbench.py resnet50...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/resnet.py", line 284, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/bx/cbxzj3vsscgkrprjh6nf3ai7tocnnolptw6k4qba4hqgiqww7lwc.py", line 1152, in call
return (buf110, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_162, primals_163, primals_165, primals_166, primals_168, primals_169, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf54, buf55, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, as_strided(buf109, (2, 2048), (2048, 1)), as_strided(primals_160, (1000, 2048), (2048, 1)), buf111, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train resnet50 FAIL
Running torchbench.py resnet50_quantized_qat...
WARNING:common:fp64 golden ref were not generated for resnet50_quantized_qat
[2022-11-06 03:31:17,609] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,620] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,638] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,662] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,671] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,688] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,698] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,713] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,721] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,737] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,744] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,759] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,766] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,774] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,790] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,797] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,813] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,820] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,835] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,842] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,849] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,865] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,872] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,888] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,896] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,911] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,917] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,925] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,942] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,950] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,968] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,976] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:17,993] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,000] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,015] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,021] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,029] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,045] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,052] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,068] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,075] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,090] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,096] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,104] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,119] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,127] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,142] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,150] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,164] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,171] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,178] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,194] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,201] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,217] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,224] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,239] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,245] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,252] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,269] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,278] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,295] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,302] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,318] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,325] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,340] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,346] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,354] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,370] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,377] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,393] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,400] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,415] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,421] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,429] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,445] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,453] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,468] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,475] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,490] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,497] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,504] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,520] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,527] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,543] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,550] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,565] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,572] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,579] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,594] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,602] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,617] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,624] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,640] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,646] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,653] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,669] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,676] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,692] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,699] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,714] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,721] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,728] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,744] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,753] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,769] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,777] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,793] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,800] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,816] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,822] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,830] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,845] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,853] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,869] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,876] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,891] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,897] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,905] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,920] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,928] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,943] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,950] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,965] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,972] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,978] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,983] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,988] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
[2022-11-06 03:31:18,993] torch._inductor.ir: [WARNING] Using FallbackKernel: aten._fused_moving_avg_obs_fq_helper_functional
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/fx/graph_module.py", line 660, in call_wrapped
return self._wrapped_call(self, *args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/fx/graph_module.py", line 279, in __call__
raise e
File "/scratch/ezyang/work/pytorch/torch/fx/graph_module.py", line 269, in __call__
return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc]
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "<eval_with_key>.8", line 4, in forward
def forward(self, x : torch.Tensor) -> torch.Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/vi/cvilyeiqsp7ivdfcj2cw6in2synczfq2kvep2uohh6hwn5xv5wqy.py", line 3577, in call
return (buf1123, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_169, primals_170, primals_186, primals_187, primals_203, primals_204, primals_220, primals_221, primals_237, primals_238, primals_261, primals_262, primals_278, primals_279, primals_295, primals_296, primals_319, primals_320, primals_336, primals_337, primals_353, primals_354, primals_377, primals_378, primals_394, primals_395, primals_411, primals_412, primals_428, primals_429, primals_452, primals_453, primals_469, primals_470, primals_486, primals_487, primals_510, primals_511, primals_527, primals_528, primals_544, primals_545, primals_568, primals_569, primals_585, primals_586, primals_602, primals_603, primals_626, primals_627, primals_643, primals_644, primals_660, primals_661, primals_677, primals_678, primals_701, primals_702, primals_718, primals_719, primals_735, primals_736, primals_759, primals_760, primals_776, primals_777, primals_793, primals_794, primals_817, primals_818, primals_834, primals_835, primals_851, primals_852, primals_875, primals_876, primals_892, primals_893, primals_909, primals_910, primals_933, primals_934, primals_950, primals_951, primals_967, primals_968, primals_991, primals_992, primals_1008, primals_1009, primals_1025, primals_1026, primals_1042, primals_1043, primals_1066, primals_1067, primals_1083, primals_1084, primals_1100, primals_1101, primals_1124, primals_1125, primals_1141, primals_1142, primals_1158, primals_1159, buf1, buf9, buf10, buf16, buf19, buf20, buf26, buf28, buf29, buf36, buf37, buf43, buf46, buf47, buf54, buf55, buf61, buf64, buf65, buf72, buf73, buf79, buf83, buf90, buf91, buf97, buf101, buf108, buf109, buf116, buf117, buf123, buf126, buf127, buf134, buf135, buf141, buf144, buf145, buf152, buf153, buf159, buf163, buf170, buf171, buf178, buf179, buf185, buf188, buf189, buf196, buf197, buf203, buf206, buf207, buf214, buf215, buf221, buf225, buf232, buf233, buf240, buf241, buf247, buf250, buf251, buf258, buf259, buf265, buf268, buf269, buf276, buf277, buf283, buf287, buf294, buf295, buf301, buf305, buf312, buf313, buf320, buf321, buf327, buf330, buf331, buf338, buf339, buf345, buf348, buf349, buf356, buf357, buf363, buf367, buf374, buf375, buf382, buf383, buf389, buf392, buf393, buf400, buf401, buf407, buf410, buf411, buf418, buf419, buf425, buf429, buf436, buf437, buf444, buf445, buf451, buf454, buf455, buf462, buf463, buf469, buf472, buf473, buf480, buf481, buf487, buf491, buf498, buf499, buf506, buf507, buf513, buf516, buf517, buf524, buf525, buf531, buf534, buf535, buf542, buf543, buf549, buf553, buf560, buf561, buf567, buf571, buf578, buf579, buf586, buf587, buf593, buf596, buf597, buf604, buf605, buf611, buf614, buf615, buf622, buf623, buf629, buf633, buf640, buf641, buf648, buf649, buf655, buf658, buf659, buf666, buf667, buf673, buf676, buf677, buf684, buf685, buf691, buf695, buf702, buf703, buf710, buf711, buf717, buf720, buf721, buf728, buf729, buf735, buf738, buf739, buf746, buf747, buf753, buf757, buf764, buf765, buf772, buf773, buf779, buf782, buf783, buf790, buf791, buf797, buf800, buf801, buf808, buf809, buf815, buf819, buf826, buf827, buf834, buf835, buf841, buf844, buf845, buf852, buf853, buf859, buf862, buf863, buf870, buf871, buf877, buf881, buf888, buf889, buf896, buf897, buf903, buf906, buf907, buf914, buf915, buf921, buf924, buf925, buf932, buf933, buf939, buf943, buf950, buf951, buf957, buf961, buf968, buf969, buf976, buf977, buf983, buf986, buf987, buf994, buf995, buf1001, buf1004, buf1005, buf1012, buf1013, buf1019, buf1023, buf1030, buf1031, buf1038, buf1039, buf1045, buf1048, buf1049, buf1056, buf1057, buf1063, buf1066, buf1067, buf1074, buf1075, buf1081, buf1085, buf1093, buf1102, buf1108, buf1109, buf1116, buf1124, as_strided(buf1115, (1000, 2048), (2048, 1)), buf1129, buf1130, buf1131, buf1132, buf1133, buf1134, buf1135, buf1136, buf1137, buf1138, buf1139, buf1140, buf1141, buf1142, buf1143, buf1144, buf1145, buf1146, buf1147, buf1148, buf1149, buf1150, buf1151, buf1152, buf1153, buf1154, buf1155, buf1156, buf1157, buf1158, buf1159, buf1160, buf1161, buf1162, buf1163, buf1164, buf1165, buf1166, buf1167, buf1168, buf1169, buf1170, buf1171, buf1172, buf1173, buf1174, buf1175, buf1176, buf1177, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train resnet50_quantized_qat FAIL
Running torchbench.py resnext50_32x4d...
ERROR:common:name 's0' is not defined
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/resnet.py", line 284, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 945, in new_func
return compiled_fn(args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 321, in g
return f(*args)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 573, in compiled_function
return CompiledFunction.apply(*remove_dupe_args(args))
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 528, in forward
fw_outs = call_func_with_args(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 346, in call_func_with_args
out = normalize_as_list(f(args))
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 185, in run
return model(new_inputs)
File "/tmp/torchinductor_ezyang/q4/cq43f7zlzekuwaumjjv6oiy4arsha7x4mevyghtsthjg6lapcehm.py", line 1152, in call
return (buf110, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_162, primals_163, primals_165, primals_166, primals_168, primals_169, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, buf0, buf1, buf2, buf3, buf4, buf5, buf6, buf7, buf8, buf9, buf11, buf12, buf13, buf14, buf15, buf16, buf17, buf18, buf19, buf20, buf21, buf22, buf23, buf24, buf25, buf26, buf27, buf28, buf29, buf31, buf32, buf33, buf34, buf35, buf36, buf37, buf38, buf39, buf40, buf41, buf42, buf43, buf44, buf45, buf46, buf47, buf48, buf49, buf50, buf51, buf52, buf53, buf54, buf55, buf57, buf58, buf59, buf60, buf61, buf62, buf63, buf64, buf65, buf66, buf67, buf68, buf69, buf70, buf71, buf72, buf73, buf74, buf75, buf76, buf77, buf78, buf79, buf80, buf81, buf82, buf83, buf84, buf85, buf86, buf87, buf88, buf89, buf90, buf91, buf92, buf93, buf95, buf96, buf97, buf98, buf99, buf100, buf101, buf102, buf103, buf104, buf105, buf106, as_strided(buf109, (2, 2048), (2048, 1)), as_strided(primals_160, (1000, 2048), (2048, 1)), buf111, s0, )
NameError: name 's0' is not defined
TorchDynamo optimized model failed to run because of following error
cuda train resnext50_32x4d FAIL
Running torchbench.py shufflenet_v2_x1_0...
ERROR:common:[TensorBox(StorageBox(
MatrixMultiplyAdd(
name=buf161,
layout=FlexibleLayout('cuda', torch.float32, size=[2, 1000], stride=[1000, 1]),
inputs=[InputBuffer(name='primals_170', layout=FixedLayout('cuda', torch.float32, size=[1000], stride=[1])), ComputedBuffer(name='buf160', layout=FlexibleLayout('cuda', torch.float32, size=[2, 1024], stride=[1024, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf159, i1 + 1024 * i0) / index_expr(49, torch.float32),
ranges=[2, 1024],
origins={mean}
)), ReinterpretView(
StorageBox(
InputBuffer(name='primals_169', layout=FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]))
),
FixedLayout('cuda', torch.float32, size=[1024, 1000], stride=[1, 1024]),
origins={permute_16}
)],
constant_args=(),
kwargs={'beta': 1, 'alpha': 1},
output_view=None,
origins={addmm}
)
)), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[24, 3, 3, 3], stride=[27, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[24, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_8', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_14', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_20', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_26', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_32', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_38', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_44', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_50', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_56', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_59', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_62', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_64', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_65', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_67', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_68', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_70', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_71', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_73', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_74', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_76', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_77', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_79', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_80', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_82', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_83', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_85', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_86', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_88', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_89', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_91', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_92', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_94', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_95', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_97', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_98', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_100', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_101', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_103', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_104', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_106', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_107', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_109', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_110', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_112', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_113', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_115', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_116', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_118', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_119', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_121', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_122', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_124', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_125', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_127', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_128', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_130', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_131', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_133', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_134', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_136', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_137', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_139', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_140', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_142', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_143', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_145', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_146', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_148', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_149', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_151', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_152', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_154', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_155', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_157', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_158', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_160', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_161', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_163', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_164', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_166', layout=FixedLayout('cuda', torch.float32, size=[1024, 464, 1, 1], stride=[464, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_167', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_171', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_172', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_174', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_175', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_177', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_178', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_180', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_181', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_183', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_184', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_186', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_187', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_189', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_190', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_192', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_193', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_195', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_196', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_198', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_199', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_201', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_202', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_204', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_205', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_207', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_208', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_210', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_211', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_213', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_214', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_216', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_217', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_219', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_220', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_222', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_223', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_225', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_226', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_228', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_229', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_231', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_232', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_234', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_235', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_237', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_238', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_240', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_241', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_243', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_244', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_246', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_247', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_249', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_250', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_252', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_253', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_255', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_256', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_258', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_259', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_261', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_262', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_264', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_265', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_267', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_268', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_270', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_271', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_273', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_274', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_276', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_277', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_279', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_280', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_282', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_283', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_285', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_286', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_288', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_289', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_291', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_292', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_294', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_295', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_297', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_298', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_300', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_301', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_303', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_304', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_306', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_307', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_309', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_310', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_312', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_313', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_315', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_316', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_318', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_319', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_321', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_322', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_324', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_325', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_327', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_328', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_330', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_331', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_333', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_334', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_336', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_337', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_339', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
Convolution(
name=buf0,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 24, 112, 112]), stride=[301056, 12544, 112, 1]),
inputs=[InputBuffer(name='primals_339', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1])), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[24, 3, 3, 3], stride=[27, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf1', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 24, 112, 112]), stride=[301056, 12544, 112, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf0, i3 + 112 * i2 + 12544 * i1 + 301056 * i0) - load(primals_171, i1) * reciprocal(sqrt(load(primals_172, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_2, i1) + load(primals_3, i1)),
ranges=torch.Size([2, 24, 112, 112]),
origins={relu}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf3', layout=FlexibleLayout('cuda', torch.int64, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.int64,
where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))))))), index_expr(113 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))), index_expr(112 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))))), index_expr(111 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))), index_expr(1 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))), index_expr(2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))), index_expr(-1 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)), index_expr(-111 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), index_expr(-112 + 2 * i3 + 224 * i2, torch.int64), index_expr(-113 + 2 * i3 + 224 * i2, torch.int64))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
Convolution(
name=buf4,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
)), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[24, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 24),
kwargs={},
output_view=None,
origins={convolution_1}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf4, i3 + 28 * i2 + 784 * i1 + 18816 * i0) - load(primals_174, i1) * reciprocal(sqrt(load(primals_175, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_5, i1) + load(primals_6, i1),
ranges=torch.Size([2, 24, 28, 28]),
origins={add_3}
))
)), TensorBox(StorageBox(
Convolution(
name=buf6,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf4, i3 + 28 * i2 + 784 * i1 + 18816 * i0) - load(primals_174, i1) * reciprocal(sqrt(load(primals_175, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_5, i1) + load(primals_6, i1),
ranges=torch.Size([2, 24, 28, 28]),
origins={add_3}
)), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_2}
)
)), TensorBox(StorageBox(
Convolution(
name=buf8,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]),
inputs=[ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
)), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_3}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 56 * i2 + 3136 * i1 + 181888 * i0) - load(primals_180, i1) * reciprocal(sqrt(load(primals_181, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_11, i1) + load(primals_12, i1)),
ranges=torch.Size([2, 58, 56, 56]),
origins={relu_2}
))
)), TensorBox(StorageBox(
Convolution(
name=buf10,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 56 * i2 + 3136 * i1 + 181888 * i0) - load(primals_180, i1) * reciprocal(sqrt(load(primals_181, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_11, i1) + load(primals_12, i1)),
ranges=torch.Size([2, 58, 56, 56]),
origins={relu_2}
)), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_4}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf10, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_183, i1) * reciprocal(sqrt(load(primals_184, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_14, i1) + load(primals_15, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_9}
))
)), TensorBox(StorageBox(
Convolution(
name=buf12,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf10, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_183, i1) * reciprocal(sqrt(load(primals_184, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_14, i1) + load(primals_15, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_9}
)), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_5}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf14, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_1}
),
size=[2, 58, 28, 28],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 58, i2, i3],
origins={split}
)
), TensorBox(StorageBox(
Convolution(
name=buf16,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf14, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_1}
),
FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[90944, 784, 28, 1], offset=45472),
origins={convolution_6}
), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_6}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf16, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_189, i1) * reciprocal(sqrt(load(primals_190, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_20, i1) + load(primals_21, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_4}
))
)), TensorBox(StorageBox(
Convolution(
name=buf18,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf16, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_189, i1) * reciprocal(sqrt(load(primals_190, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_20, i1) + load(primals_21, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_4}
)), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_7}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf18, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_192, i1) * reciprocal(sqrt(load(primals_193, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_23, i1) + load(primals_24, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_15}
))
)), TensorBox(StorageBox(
Convolution(
name=buf20,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf18, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_192, i1) * reciprocal(sqrt(load(primals_193, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_23, i1) + load(primals_24, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_15}
)), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_8}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf23, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_1}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_3}
),
size=[2, 58, 28, 28],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 58, i2, i3],
origins={split_1}
)
), TensorBox(StorageBox(
Convolution(
name=buf25,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf23, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_1}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_3}
),
FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[90944, 784, 28, 1], offset=45472),
origins={convolution_9}
), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_9}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf25, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_198, i1) * reciprocal(sqrt(load(primals_199, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_29, i1) + load(primals_30, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_6}
))
)), TensorBox(StorageBox(
Convolution(
name=buf27,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf25, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_198, i1) * reciprocal(sqrt(load(primals_199, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_29, i1) + load(primals_30, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_6}
)), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_10}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_201, i1) * reciprocal(sqrt(load(primals_202, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_32, i1) + load(primals_33, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_21}
))
)), TensorBox(StorageBox(
Convolution(
name=buf29,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_201, i1) * reciprocal(sqrt(load(primals_202, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_32, i1) + load(primals_33, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_21}
)), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_11}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf32, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_2}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_5}
),
size=[2, 58, 28, 28],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 58, i2, i3],
origins={split_2}
)
), TensorBox(StorageBox(
Convolution(
name=buf34,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf32, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_2}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_5}
),
FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[90944, 784, 28, 1], offset=45472),
origins={convolution_12}
), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_12}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf34, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_207, i1) * reciprocal(sqrt(load(primals_208, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_38, i1) + load(primals_39, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_8}
))
)), TensorBox(StorageBox(
Convolution(
name=buf36,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf34, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_207, i1) * reciprocal(sqrt(load(primals_208, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_38, i1) + load(primals_39, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_8}
)), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_13}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf36, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_210, i1) * reciprocal(sqrt(load(primals_211, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_41, i1) + load(primals_42, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_27}
))
)), TensorBox(StorageBox(
Convolution(
name=buf38,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf36, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_210, i1) * reciprocal(sqrt(load(primals_211, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_41, i1) + load(primals_42, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_27}
)), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_14}
)
)), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf41, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_3}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_7}
)
), TensorBox(StorageBox(
Convolution(
name=buf43,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf41, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_3}
))
),
FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]),
origins={convolution_15}
), InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_15}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf43, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_216, i1) * reciprocal(sqrt(load(primals_217, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_47, i1) + load(primals_48, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_31}
))
)), TensorBox(StorageBox(
Convolution(
name=buf45,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf43, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_216, i1) * reciprocal(sqrt(load(primals_217, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_47, i1) + load(primals_48, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_31}
)), InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_16}
)
)), TensorBox(StorageBox(
Convolution(
name=buf47,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf41, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_3}
))
),
FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]),
origins={convolution_17}
), InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_17}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf48', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf47, i3 + 28 * i2 + 784 * i1 + 90944 * i0) - load(primals_222, i1) * reciprocal(sqrt(load(primals_223, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_53, i1) + load(primals_54, i1)),
ranges=torch.Size([2, 116, 28, 28]),
origins={relu_11}
))
)), TensorBox(StorageBox(
Convolution(
name=buf49,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf48', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf47, i3 + 28 * i2 + 784 * i1 + 90944 * i0) - load(primals_222, i1) * reciprocal(sqrt(load(primals_223, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_53, i1) + load(primals_54, i1)),
ranges=torch.Size([2, 116, 28, 28]),
origins={relu_11}
)), InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_18}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf49, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_225, i1) * reciprocal(sqrt(load(primals_226, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_56, i1) + load(primals_57, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_37}
))
)), TensorBox(StorageBox(
Convolution(
name=buf51,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf49, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_225, i1) * reciprocal(sqrt(load(primals_226, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_56, i1) + load(primals_57, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_37}
)), InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_19}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf53, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_4}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_9}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_3}
)
), TensorBox(StorageBox(
Convolution(
name=buf55,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf53, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_4}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_9}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_20}
), InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_20}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf55, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_231, i1) * reciprocal(sqrt(load(primals_232, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_62, i1) + load(primals_63, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_13}
))
)), TensorBox(StorageBox(
Convolution(
name=buf57,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf55, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_231, i1) * reciprocal(sqrt(load(primals_232, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_62, i1) + load(primals_63, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_13}
)), InputBuffer(name='primals_64', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_21}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_234, i1) * reciprocal(sqrt(load(primals_235, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_65, i1) + load(primals_66, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_43}
))
)), TensorBox(StorageBox(
Convolution(
name=buf59,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_234, i1) * reciprocal(sqrt(load(primals_235, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_65, i1) + load(primals_66, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_43}
)), InputBuffer(name='primals_67', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_22}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf62, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_5}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_11}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_4}
)
), TensorBox(StorageBox(
Convolution(
name=buf64,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf62, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_5}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_11}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_23}
), InputBuffer(name='primals_70', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_23}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf64, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_240, i1) * reciprocal(sqrt(load(primals_241, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_71, i1) + load(primals_72, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_15}
))
)), TensorBox(StorageBox(
Convolution(
name=buf66,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf64, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_240, i1) * reciprocal(sqrt(load(primals_241, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_71, i1) + load(primals_72, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_15}
)), InputBuffer(name='primals_73', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_24}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_243, i1) * reciprocal(sqrt(load(primals_244, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_74, i1) + load(primals_75, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_49}
))
)), TensorBox(StorageBox(
Convolution(
name=buf68,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_243, i1) * reciprocal(sqrt(load(primals_244, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_74, i1) + load(primals_75, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_49}
)), InputBuffer(name='primals_76', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_25}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf71, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_6}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_13}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_5}
)
), TensorBox(StorageBox(
Convolution(
name=buf73,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf71, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_6}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_13}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_26}
), InputBuffer(name='primals_79', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_26}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf73, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_249, i1) * reciprocal(sqrt(load(primals_250, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_80, i1) + load(primals_81, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_17}
))
)), TensorBox(StorageBox(
Convolution(
name=buf75,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf73, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_249, i1) * reciprocal(sqrt(load(primals_250, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_80, i1) + load(primals_81, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_17}
)), InputBuffer(name='primals_82', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_27}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf75, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_252, i1) * reciprocal(sqrt(load(primals_253, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_83, i1) + load(primals_84, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_55}
))
)), TensorBox(StorageBox(
Convolution(
name=buf77,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf75, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_252, i1) * reciprocal(sqrt(load(primals_253, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_83, i1) + load(primals_84, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_55}
)), InputBuffer(name='primals_85', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_28}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_7}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_15}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_6}
)
), TensorBox(StorageBox(
Convolution(
name=buf82,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_7}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_15}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_29}
), InputBuffer(name='primals_88', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_29}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf82, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_258, i1) * reciprocal(sqrt(load(primals_259, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_89, i1) + load(primals_90, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_19}
))
)), TensorBox(StorageBox(
Convolution(
name=buf84,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf82, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_258, i1) * reciprocal(sqrt(load(primals_259, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_89, i1) + load(primals_90, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_19}
)), InputBuffer(name='primals_91', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_30}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf84, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_261, i1) * reciprocal(sqrt(load(primals_262, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_92, i1) + load(primals_93, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_61}
))
)), TensorBox(StorageBox(
Convolution(
name=buf86,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf84, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_261, i1) * reciprocal(sqrt(load(primals_262, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_92, i1) + load(primals_93, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_61}
)), InputBuffer(name='primals_94', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_31}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf89, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_8}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_17}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_7}
)
), TensorBox(StorageBox(
Convolution(
name=buf91,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf89, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_8}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_17}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_32}
), InputBuffer(name='primals_97', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_32}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf92', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf91, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_267, i1) * reciprocal(sqrt(load(primals_268, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_98, i1) + load(primals_99, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_21}
))
)), TensorBox(StorageBox(
Convolution(
name=buf93,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf92', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf91, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_267, i1) * reciprocal(sqrt(load(primals_268, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_98, i1) + load(primals_99, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_21}
)), InputBuffer(name='primals_100', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_33}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf93, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_270, i1) * reciprocal(sqrt(load(primals_271, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_101, i1) + load(primals_102, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_67}
))
)), TensorBox(StorageBox(
Convolution(
name=buf95,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf93, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_270, i1) * reciprocal(sqrt(load(primals_271, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_101, i1) + load(primals_102, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_67}
)), InputBuffer(name='primals_103', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_34}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf99', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf98, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_9}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_19}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_8}
)
), TensorBox(StorageBox(
Convolution(
name=buf100,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf99', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf98, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_9}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_19}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_35}
), InputBuffer(name='primals_106', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_35}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf100, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_276, i1) * reciprocal(sqrt(load(primals_277, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_107, i1) + load(primals_108, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_23}
))
)), TensorBox(StorageBox(
Convolution(
name=buf102,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf100, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_276, i1) * reciprocal(sqrt(load(primals_277, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_107, i1) + load(primals_108, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_23}
)), InputBuffer(name='primals_109', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_36}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf102, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_279, i1) * reciprocal(sqrt(load(primals_280, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_110, i1) + load(primals_111, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_73}
))
)), TensorBox(StorageBox(
Convolution(
name=buf104,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf102, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_279, i1) * reciprocal(sqrt(load(primals_280, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_110, i1) + load(primals_111, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_73}
)), InputBuffer(name='primals_112', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_37}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf107, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_10}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_21}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_9}
)
), TensorBox(StorageBox(
Convolution(
name=buf109,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf107, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_10}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_21}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_38}
), InputBuffer(name='primals_115', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_38}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf109, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_285, i1) * reciprocal(sqrt(load(primals_286, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_116, i1) + load(primals_117, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_25}
))
)), TensorBox(StorageBox(
Convolution(
name=buf111,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf109, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_285, i1) * reciprocal(sqrt(load(primals_286, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_116, i1) + load(primals_117, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_25}
)), InputBuffer(name='primals_118', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_39}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf111, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_288, i1) * reciprocal(sqrt(load(primals_289, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_119, i1) + load(primals_120, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_79}
))
)), TensorBox(StorageBox(
Convolution(
name=buf113,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf111, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_288, i1) * reciprocal(sqrt(load(primals_289, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_119, i1) + load(primals_120, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_79}
)), InputBuffer(name='primals_121', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_40}
)
)), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf116, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_11}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_23}
)
), TensorBox(StorageBox(
Convolution(
name=buf118,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf116, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_11}
))
),
FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]),
origins={convolution_41}
), InputBuffer(name='primals_124', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_41}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf118, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_294, i1) * reciprocal(sqrt(load(primals_295, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_125, i1) + load(primals_126, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_83}
))
)), TensorBox(StorageBox(
Convolution(
name=buf120,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf118, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_294, i1) * reciprocal(sqrt(load(primals_295, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_125, i1) + load(primals_126, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_83}
)), InputBuffer(name='primals_127', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_42}
)
)), TensorBox(StorageBox(
Convolution(
name=buf122,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf116, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_11}
))
),
FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]),
origins={convolution_43}
), InputBuffer(name='primals_130', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_43}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf122, i3 + 14 * i2 + 196 * i1 + 45472 * i0) - load(primals_300, i1) * reciprocal(sqrt(load(primals_301, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_131, i1) + load(primals_132, i1)),
ranges=torch.Size([2, 232, 14, 14]),
origins={relu_28}
))
)), TensorBox(StorageBox(
Convolution(
name=buf124,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf122, i3 + 14 * i2 + 196 * i1 + 45472 * i0) - load(primals_300, i1) * reciprocal(sqrt(load(primals_301, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_131, i1) + load(primals_132, i1)),
ranges=torch.Size([2, 232, 14, 14]),
origins={relu_28}
)), InputBuffer(name='primals_133', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_44}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf125', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf124, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_303, i1) * reciprocal(sqrt(load(primals_304, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_134, i1) + load(primals_135, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_89}
))
)), TensorBox(StorageBox(
Convolution(
name=buf126,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf125', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf124, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_303, i1) * reciprocal(sqrt(load(primals_304, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_134, i1) + load(primals_135, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_89}
)), InputBuffer(name='primals_136', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_45}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf129', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf128, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_12}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_25}
),
size=[2, 232, 7, 7],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 232, i2, i3],
origins={split_10}
)
), TensorBox(StorageBox(
Convolution(
name=buf130,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf129', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf128, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_12}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_25}
),
FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[22736, 49, 7, 1], offset=11368),
origins={convolution_46}
), InputBuffer(name='primals_139', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_46}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf131', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf130, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_309, i1) * reciprocal(sqrt(load(primals_310, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_140, i1) + load(primals_141, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_30}
))
)), TensorBox(StorageBox(
Convolution(
name=buf132,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf131', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf130, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_309, i1) * reciprocal(sqrt(load(primals_310, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_140, i1) + load(primals_141, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_30}
)), InputBuffer(name='primals_142', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_47}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf133', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf132, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_312, i1) * reciprocal(sqrt(load(primals_313, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_143, i1) + load(primals_144, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_95}
))
)), TensorBox(StorageBox(
Convolution(
name=buf134,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf133', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf132, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_312, i1) * reciprocal(sqrt(load(primals_313, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_143, i1) + load(primals_144, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_95}
)), InputBuffer(name='primals_145', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_48}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf138', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf137, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_13}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_27}
),
size=[2, 232, 7, 7],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 232, i2, i3],
origins={split_11}
)
), TensorBox(StorageBox(
Convolution(
name=buf139,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf138', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf137, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_13}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_27}
),
FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[22736, 49, 7, 1], offset=11368),
origins={convolution_49}
), InputBuffer(name='primals_148', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_49}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf140', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf139, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_318, i1) * reciprocal(sqrt(load(primals_319, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_149, i1) + load(primals_150, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_32}
))
)), TensorBox(StorageBox(
Convolution(
name=buf141,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf140', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf139, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_318, i1) * reciprocal(sqrt(load(primals_319, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_149, i1) + load(primals_150, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_32}
)), InputBuffer(name='primals_151', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_50}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf142', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf141, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_321, i1) * reciprocal(sqrt(load(primals_322, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_152, i1) + load(primals_153, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_101}
))
)), TensorBox(StorageBox(
Convolution(
name=buf143,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf142', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf141, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_321, i1) * reciprocal(sqrt(load(primals_322, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_152, i1) + load(primals_153, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_101}
)), InputBuffer(name='primals_154', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_51}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf147', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf146, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_14}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_29}
),
size=[2, 232, 7, 7],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 232, i2, i3],
origins={split_12}
)
), TensorBox(StorageBox(
Convolution(
name=buf148,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf147', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf146, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_14}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_29}
),
FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[22736, 49, 7, 1], offset=11368),
origins={convolution_52}
), InputBuffer(name='primals_157', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_52}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf149', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf148, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_327, i1) * reciprocal(sqrt(load(primals_328, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_158, i1) + load(primals_159, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_34}
))
)), TensorBox(StorageBox(
Convolution(
name=buf150,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf149', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf148, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_327, i1) * reciprocal(sqrt(load(primals_328, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_158, i1) + load(primals_159, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_34}
)), InputBuffer(name='primals_160', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_53}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf151', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf150, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_330, i1) * reciprocal(sqrt(load(primals_331, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_161, i1) + load(primals_162, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_107}
))
)), TensorBox(StorageBox(
Convolution(
name=buf152,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf151', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf150, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_330, i1) * reciprocal(sqrt(load(primals_331, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_161, i1) + load(primals_162, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_107}
)), InputBuffer(name='primals_163', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_54}
)
)), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf156', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf155, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_15}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_31}
)
), TensorBox(StorageBox(
Convolution(
name=buf157,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 1024, 7, 7]), stride=[50176, 49, 7, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf156', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf155, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_15}
))
),
FixedLayout('cuda', torch.float32, size=(2, 464, 7, 7), stride=[22736, 49, 7, 1]),
origins={convolution_55}
), InputBuffer(name='primals_166', layout=FixedLayout('cuda', torch.float32, size=[1024, 464, 1, 1], stride=[464, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_55}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf160', layout=FlexibleLayout('cuda', torch.float32, size=[2, 1024], stride=[1024, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf159, i1 + 1024 * i0) / index_expr(49, torch.float32),
ranges=[2, 1024],
origins={mean}
))
)), TensorBox(
ReinterpretView(
StorageBox(
InputBuffer(name='primals_169', layout=FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]))
),
FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]),
origins={permute_17}
)
), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf158, i3 + 7 * i2 + 49 * i1 + 50176 * i0) <= constant(0, torch.float32),
ranges=torch.Size([2, 1024, 7, 7]),
origins={le}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf153, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_1}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf144, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_3}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf135, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_5}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf127, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_7}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf121, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_9}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf114, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_10}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf105, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_12}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf96, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_14}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf87, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_16}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf78, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_18}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf69, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_20}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf60, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_22}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf52, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_24}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf46, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_26}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf39, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_27}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf30, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_29}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf21, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_31}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf13, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_33}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf7, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_35}
)
)), s0, 28, 28, 14, 14, 7, 7]
While executing return [addmm, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_160, primals_161, primals_163, primals_164, primals_166, primals_167, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, primals_322, primals_324, primals_325, primals_327, primals_328, primals_330, primals_331, primals_333, primals_334, primals_336, primals_337, primals_339, convolution, relu, getitem, getitem_1, convolution_1, add_3, convolution_2, convolution_3, relu_2, convolution_4, add_9, convolution_5, getitem_3, convolution_6, relu_4, convolution_7, add_15, convolution_8, getitem_5, convolution_9, relu_6, convolution_10, add_21, convolution_11, getitem_7, convolution_12, relu_8, convolution_13, add_27, convolution_14, view_7, convolution_15, add_31, convolution_16, convolution_17, relu_11, convolution_18, add_37, convolution_19, getitem_9, convolution_20, relu_13, convolution_21, add_43, convolution_22, getitem_11, convolution_23, relu_15, convolution_24, add_49, convolution_25, getitem_13, convolution_26, relu_17, convolution_27, add_55, convolution_28, getitem_15, convolution_29, relu_19, convolution_30, add_61, convolution_31, getitem_17, convolution_32, relu_21, convolution_33, add_67, convolution_34, getitem_19, convolution_35, relu_23, convolution_36, add_73, convolution_37, getitem_21, convolution_38, relu_25, convolution_39, add_79, convolution_40, view_23, convolution_41, add_83, convolution_42, convolution_43, relu_28, convolution_44, add_89, convolution_45, getitem_23, convolution_46, relu_30, convolution_47, add_95, convolution_48, getitem_25, convolution_49, relu_32, convolution_50, add_101, convolution_51, getitem_27, convolution_52, relu_34, convolution_53, add_107, convolution_54, view_31, convolution_55, mean, permute_17, le, le_1, le_3, le_5, le_7, le_9, le_10, le_12, le_14, le_16, le_18, le_20, le_22, le_24, le_26, le_27, le_29, le_31, le_33, le_35, sym_size, sym_size_1, sym_size_2, sym_size_3, sym_size_4, sym_size_5, sym_size_6]
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/shufflenetv2.py", line 165, in forward
def forward(self, x: Tensor) -> Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 267, in output
assert all(
AssertionError: [TensorBox(StorageBox(
MatrixMultiplyAdd(
name=buf161,
layout=FlexibleLayout('cuda', torch.float32, size=[2, 1000], stride=[1000, 1]),
inputs=[InputBuffer(name='primals_170', layout=FixedLayout('cuda', torch.float32, size=[1000], stride=[1])), ComputedBuffer(name='buf160', layout=FlexibleLayout('cuda', torch.float32, size=[2, 1024], stride=[1024, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf159, i1 + 1024 * i0) / index_expr(49, torch.float32),
ranges=[2, 1024],
origins={mean}
)), ReinterpretView(
StorageBox(
InputBuffer(name='primals_169', layout=FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]))
),
FixedLayout('cuda', torch.float32, size=[1024, 1000], stride=[1, 1024]),
origins={permute_16}
)],
constant_args=(),
kwargs={'beta': 1, 'alpha': 1},
output_view=None,
origins={addmm}
)
)), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[24, 3, 3, 3], stride=[27, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_2', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[24, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_8', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_14', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_20', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_26', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_32', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_38', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_44', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_50', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_56', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_59', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_62', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_64', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_65', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_67', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_68', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_70', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_71', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_73', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_74', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_76', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_77', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_79', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_80', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_82', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_83', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_85', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_86', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_88', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_89', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_91', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_92', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_94', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_95', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_97', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_98', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_100', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_101', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_103', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_104', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_106', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_107', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_109', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_110', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_112', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_113', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_115', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_116', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_118', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_119', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_121', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_122', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_124', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_125', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_127', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_128', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_130', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_131', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_133', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_134', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_136', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_137', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_139', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_140', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_142', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_143', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_145', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_146', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_148', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_149', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_151', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_152', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_154', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_155', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_157', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_158', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_160', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_161', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_163', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_164', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_166', layout=FixedLayout('cuda', torch.float32, size=[1024, 464, 1, 1], stride=[464, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_167', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_171', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_172', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_174', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_175', layout=FixedLayout('cuda', torch.float32, size=[24], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_177', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_178', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_180', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_181', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_183', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_184', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_186', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_187', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_189', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_190', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_192', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_193', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_195', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_196', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_198', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_199', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_201', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_202', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_204', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_205', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_207', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_208', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_210', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_211', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_213', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_214', layout=FixedLayout('cuda', torch.float32, size=[58], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_216', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_217', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_219', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_220', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_222', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_223', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_225', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_226', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_228', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_229', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_231', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_232', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_234', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_235', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_237', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_238', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_240', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_241', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_243', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_244', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_246', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_247', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_249', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_250', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_252', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_253', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_255', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_256', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_258', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_259', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_261', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_262', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_264', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_265', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_267', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_268', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_270', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_271', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_273', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_274', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_276', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_277', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_279', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_280', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_282', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_283', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_285', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_286', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_288', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_289', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_291', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_292', layout=FixedLayout('cuda', torch.float32, size=[116], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_294', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_295', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_297', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_298', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_300', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_301', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_303', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_304', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_306', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_307', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_309', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_310', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_312', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_313', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_315', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_316', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_318', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_319', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_321', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_322', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_324', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_325', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_327', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_328', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_330', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_331', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_333', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_334', layout=FixedLayout('cuda', torch.float32, size=[232], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_336', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_337', layout=FixedLayout('cuda', torch.float32, size=[1024], stride=[1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_339', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
Convolution(
name=buf0,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 24, 112, 112]), stride=[301056, 12544, 112, 1]),
inputs=[InputBuffer(name='primals_339', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1])), InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[24, 3, 3, 3], stride=[27, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf1', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 24, 112, 112]), stride=[301056, 12544, 112, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf0, i3 + 112 * i2 + 12544 * i1 + 301056 * i0) - load(primals_171, i1) * reciprocal(sqrt(load(primals_172, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_2, i1) + load(primals_3, i1)),
ranges=torch.Size([2, 24, 112, 112]),
origins={relu}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf3', layout=FlexibleLayout('cuda', torch.int64, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.int64,
where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))))))), index_expr(113 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))), index_expr(112 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))))), index_expr(111 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))), index_expr(1 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)))), index_expr(2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))), index_expr(-1 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf)), index_expr(-111 + 2 * i3 + 224 * i2, torch.int64), where(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf) > masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), index_expr(-112 + 2 * i3 + 224 * i2, torch.int64), index_expr(-113 + 2 * i3 + 224 * i2, torch.int64))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
Convolution(
name=buf4,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
)), InputBuffer(name='primals_4', layout=FixedLayout('cuda', torch.float32, size=[24, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 24),
kwargs={},
output_view=None,
origins={convolution_1}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf4, i3 + 28 * i2 + 784 * i1 + 18816 * i0) - load(primals_174, i1) * reciprocal(sqrt(load(primals_175, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_5, i1) + load(primals_6, i1),
ranges=torch.Size([2, 24, 28, 28]),
origins={add_3}
))
)), TensorBox(StorageBox(
Convolution(
name=buf6,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 24, 28, 28]), stride=[18816, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf4, i3 + 28 * i2 + 784 * i1 + 18816 * i0) - load(primals_174, i1) * reciprocal(sqrt(load(primals_175, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_5, i1) + load(primals_6, i1),
ranges=torch.Size([2, 24, 28, 28]),
origins={add_3}
)), InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_2}
)
)), TensorBox(StorageBox(
Convolution(
name=buf8,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]),
inputs=[ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 24, 56, 56], stride=[75264, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -1 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -111 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), maximum(masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -112 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf), masked(index_expr(-1 + 2 * i2, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i2, torch.int64) < index_expr(112, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) >= index_expr(0, torch.int64) & index_expr(-1 + 2 * i3, torch.int64) < index_expr(112, torch.int64), load(buf1, -113 + 2 * i3 + 224 * i2 + 12544 * i1 + 301056 * i0), -inf))))))))),
ranges=[2, 24, 56, 56],
origins={max_pool2d_with_indices}
)), InputBuffer(name='primals_10', layout=FixedLayout('cuda', torch.float32, size=[58, 24, 1, 1], stride=[24, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_3}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 56 * i2 + 3136 * i1 + 181888 * i0) - load(primals_180, i1) * reciprocal(sqrt(load(primals_181, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_11, i1) + load(primals_12, i1)),
ranges=torch.Size([2, 58, 56, 56]),
origins={relu_2}
))
)), TensorBox(StorageBox(
Convolution(
name=buf10,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf9', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 56, 56]), stride=[181888, 3136, 56, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf8, i3 + 56 * i2 + 3136 * i1 + 181888 * i0) - load(primals_180, i1) * reciprocal(sqrt(load(primals_181, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_11, i1) + load(primals_12, i1)),
ranges=torch.Size([2, 58, 56, 56]),
origins={relu_2}
)), InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_4}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf10, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_183, i1) * reciprocal(sqrt(load(primals_184, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_14, i1) + load(primals_15, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_9}
))
)), TensorBox(StorageBox(
Convolution(
name=buf12,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf11', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf10, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_183, i1) * reciprocal(sqrt(load(primals_184, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_14, i1) + load(primals_15, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_9}
)), InputBuffer(name='primals_16', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_5}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf14, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_1}
),
size=[2, 58, 28, 28],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 58, i2, i3],
origins={split}
)
), TensorBox(StorageBox(
Convolution(
name=buf16,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf15', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf14, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_1}
),
FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[90944, 784, 28, 1], offset=45472),
origins={convolution_6}
), InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_6}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf16, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_189, i1) * reciprocal(sqrt(load(primals_190, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_20, i1) + load(primals_21, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_4}
))
)), TensorBox(StorageBox(
Convolution(
name=buf18,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf17', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf16, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_189, i1) * reciprocal(sqrt(load(primals_190, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_20, i1) + load(primals_21, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_4}
)), InputBuffer(name='primals_22', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_7}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf18, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_192, i1) * reciprocal(sqrt(load(primals_193, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_23, i1) + load(primals_24, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_15}
))
)), TensorBox(StorageBox(
Convolution(
name=buf20,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf19', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf18, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_192, i1) * reciprocal(sqrt(load(primals_193, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_23, i1) + load(primals_24, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_15}
)), InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_8}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf23, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_1}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_3}
),
size=[2, 58, 28, 28],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 58, i2, i3],
origins={split_1}
)
), TensorBox(StorageBox(
Convolution(
name=buf25,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf24', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf23, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_1}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_3}
),
FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[90944, 784, 28, 1], offset=45472),
origins={convolution_9}
), InputBuffer(name='primals_28', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_9}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf25, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_198, i1) * reciprocal(sqrt(load(primals_199, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_29, i1) + load(primals_30, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_6}
))
)), TensorBox(StorageBox(
Convolution(
name=buf27,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf26', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf25, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_198, i1) * reciprocal(sqrt(load(primals_199, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_29, i1) + load(primals_30, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_6}
)), InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_10}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_201, i1) * reciprocal(sqrt(load(primals_202, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_32, i1) + load(primals_33, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_21}
))
)), TensorBox(StorageBox(
Convolution(
name=buf29,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf27, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_201, i1) * reciprocal(sqrt(load(primals_202, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_32, i1) + load(primals_33, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_21}
)), InputBuffer(name='primals_34', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_11}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf32, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_2}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_5}
),
size=[2, 58, 28, 28],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 58, i2, i3],
origins={split_2}
)
), TensorBox(StorageBox(
Convolution(
name=buf34,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf32, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_2}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_5}
),
FixedLayout('cuda', torch.float32, size=[2, 58, 28, 28], stride=[90944, 784, 28, 1], offset=45472),
origins={convolution_12}
), InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_12}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf34, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_207, i1) * reciprocal(sqrt(load(primals_208, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_38, i1) + load(primals_39, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_8}
))
)), TensorBox(StorageBox(
Convolution(
name=buf36,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf35', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf34, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_207, i1) * reciprocal(sqrt(load(primals_208, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_38, i1) + load(primals_39, i1)),
ranges=torch.Size([2, 58, 28, 28]),
origins={relu_8}
)), InputBuffer(name='primals_40', layout=FixedLayout('cuda', torch.float32, size=[58, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 58),
kwargs={},
output_view=None,
origins={convolution_13}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf36, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_210, i1) * reciprocal(sqrt(load(primals_211, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_41, i1) + load(primals_42, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_27}
))
)), TensorBox(StorageBox(
Convolution(
name=buf38,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]),
inputs=[ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 58, 28, 28]), stride=[45472, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf36, i3 + 28 * i2 + 784 * i1 + 45472 * i0) - load(primals_210, i1) * reciprocal(sqrt(load(primals_211, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_41, i1) + load(primals_42, i1),
ranges=torch.Size([2, 58, 28, 28]),
origins={add_27}
)), InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[58, 58, 1, 1], stride=[58, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_14}
)
)), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf41, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_3}
))
),
size=(2, 116, 28, 28),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 58), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_7}
)
), TensorBox(StorageBox(
Convolution(
name=buf43,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf41, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_3}
))
),
FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]),
origins={convolution_15}
), InputBuffer(name='primals_46', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_15}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf43, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_216, i1) * reciprocal(sqrt(load(primals_217, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_47, i1) + load(primals_48, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_31}
))
)), TensorBox(StorageBox(
Convolution(
name=buf45,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf43, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_216, i1) * reciprocal(sqrt(load(primals_217, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_47, i1) + load(primals_48, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_31}
)), InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_16}
)
)), TensorBox(StorageBox(
Convolution(
name=buf47,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 58, 2, 28, 28], stride=[90944, 1568, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf41, i4 + 28 * i3 + 784 * i1 + 45472 * i2 + 90944 * i0),
ranges=[2, 58, 2, 28, 28],
origins={clone_3}
))
),
FixedLayout('cuda', torch.float32, size=(2, 116, 28, 28), stride=[90944, 784, 28, 1]),
origins={convolution_17}
), InputBuffer(name='primals_52', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_17}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf48', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf47, i3 + 28 * i2 + 784 * i1 + 90944 * i0) - load(primals_222, i1) * reciprocal(sqrt(load(primals_223, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_53, i1) + load(primals_54, i1)),
ranges=torch.Size([2, 116, 28, 28]),
origins={relu_11}
))
)), TensorBox(StorageBox(
Convolution(
name=buf49,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf48', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 28, 28]), stride=[90944, 784, 28, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf47, i3 + 28 * i2 + 784 * i1 + 90944 * i0) - load(primals_222, i1) * reciprocal(sqrt(load(primals_223, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_53, i1) + load(primals_54, i1)),
ranges=torch.Size([2, 116, 28, 28]),
origins={relu_11}
)), InputBuffer(name='primals_55', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_18}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf49, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_225, i1) * reciprocal(sqrt(load(primals_226, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_56, i1) + load(primals_57, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_37}
))
)), TensorBox(StorageBox(
Convolution(
name=buf51,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf50', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf49, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_225, i1) * reciprocal(sqrt(load(primals_226, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_56, i1) + load(primals_57, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_37}
)), InputBuffer(name='primals_58', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_19}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf53, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_4}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_9}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_3}
)
), TensorBox(StorageBox(
Convolution(
name=buf55,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf54', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf53, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_4}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_9}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_20}
), InputBuffer(name='primals_61', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_20}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf55, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_231, i1) * reciprocal(sqrt(load(primals_232, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_62, i1) + load(primals_63, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_13}
))
)), TensorBox(StorageBox(
Convolution(
name=buf57,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf56', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf55, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_231, i1) * reciprocal(sqrt(load(primals_232, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_62, i1) + load(primals_63, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_13}
)), InputBuffer(name='primals_64', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_21}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_234, i1) * reciprocal(sqrt(load(primals_235, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_65, i1) + load(primals_66, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_43}
))
)), TensorBox(StorageBox(
Convolution(
name=buf59,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf57, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_234, i1) * reciprocal(sqrt(load(primals_235, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_65, i1) + load(primals_66, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_43}
)), InputBuffer(name='primals_67', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_22}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf62, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_5}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_11}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_4}
)
), TensorBox(StorageBox(
Convolution(
name=buf64,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf62, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_5}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_11}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_23}
), InputBuffer(name='primals_70', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_23}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf64, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_240, i1) * reciprocal(sqrt(load(primals_241, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_71, i1) + load(primals_72, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_15}
))
)), TensorBox(StorageBox(
Convolution(
name=buf66,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf65', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf64, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_240, i1) * reciprocal(sqrt(load(primals_241, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_71, i1) + load(primals_72, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_15}
)), InputBuffer(name='primals_73', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_24}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_243, i1) * reciprocal(sqrt(load(primals_244, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_74, i1) + load(primals_75, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_49}
))
)), TensorBox(StorageBox(
Convolution(
name=buf68,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf67', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf66, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_243, i1) * reciprocal(sqrt(load(primals_244, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_74, i1) + load(primals_75, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_49}
)), InputBuffer(name='primals_76', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_25}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf71, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_6}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_13}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_5}
)
), TensorBox(StorageBox(
Convolution(
name=buf73,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf72', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf71, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_6}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_13}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_26}
), InputBuffer(name='primals_79', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_26}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf73, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_249, i1) * reciprocal(sqrt(load(primals_250, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_80, i1) + load(primals_81, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_17}
))
)), TensorBox(StorageBox(
Convolution(
name=buf75,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf74', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf73, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_249, i1) * reciprocal(sqrt(load(primals_250, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_80, i1) + load(primals_81, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_17}
)), InputBuffer(name='primals_82', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_27}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf75, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_252, i1) * reciprocal(sqrt(load(primals_253, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_83, i1) + load(primals_84, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_55}
))
)), TensorBox(StorageBox(
Convolution(
name=buf77,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf76', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf75, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_252, i1) * reciprocal(sqrt(load(primals_253, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_83, i1) + load(primals_84, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_55}
)), InputBuffer(name='primals_85', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_28}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_7}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_15}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_6}
)
), TensorBox(StorageBox(
Convolution(
name=buf82,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf81', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf80, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_7}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_15}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_29}
), InputBuffer(name='primals_88', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_29}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf82, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_258, i1) * reciprocal(sqrt(load(primals_259, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_89, i1) + load(primals_90, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_19}
))
)), TensorBox(StorageBox(
Convolution(
name=buf84,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf83', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf82, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_258, i1) * reciprocal(sqrt(load(primals_259, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_89, i1) + load(primals_90, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_19}
)), InputBuffer(name='primals_91', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_30}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf84, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_261, i1) * reciprocal(sqrt(load(primals_262, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_92, i1) + load(primals_93, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_61}
))
)), TensorBox(StorageBox(
Convolution(
name=buf86,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf85', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf84, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_261, i1) * reciprocal(sqrt(load(primals_262, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_92, i1) + load(primals_93, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_61}
)), InputBuffer(name='primals_94', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_31}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf89, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_8}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_17}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_7}
)
), TensorBox(StorageBox(
Convolution(
name=buf91,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf90', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf89, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_8}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_17}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_32}
), InputBuffer(name='primals_97', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_32}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf92', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf91, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_267, i1) * reciprocal(sqrt(load(primals_268, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_98, i1) + load(primals_99, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_21}
))
)), TensorBox(StorageBox(
Convolution(
name=buf93,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf92', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf91, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_267, i1) * reciprocal(sqrt(load(primals_268, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_98, i1) + load(primals_99, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_21}
)), InputBuffer(name='primals_100', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_33}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf93, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_270, i1) * reciprocal(sqrt(load(primals_271, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_101, i1) + load(primals_102, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_67}
))
)), TensorBox(StorageBox(
Convolution(
name=buf95,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf94', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf93, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_270, i1) * reciprocal(sqrt(load(primals_271, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_101, i1) + load(primals_102, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_67}
)), InputBuffer(name='primals_103', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_34}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf99', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf98, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_9}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_19}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_8}
)
), TensorBox(StorageBox(
Convolution(
name=buf100,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf99', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf98, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_9}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_19}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_35}
), InputBuffer(name='primals_106', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_35}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf100, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_276, i1) * reciprocal(sqrt(load(primals_277, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_107, i1) + load(primals_108, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_23}
))
)), TensorBox(StorageBox(
Convolution(
name=buf102,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf101', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf100, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_276, i1) * reciprocal(sqrt(load(primals_277, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_107, i1) + load(primals_108, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_23}
)), InputBuffer(name='primals_109', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_36}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf102, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_279, i1) * reciprocal(sqrt(load(primals_280, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_110, i1) + load(primals_111, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_73}
))
)), TensorBox(StorageBox(
Convolution(
name=buf104,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf103', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf102, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_279, i1) * reciprocal(sqrt(load(primals_280, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_110, i1) + load(primals_111, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_73}
)), InputBuffer(name='primals_112', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_37}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf107, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_10}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_21}
),
size=[2, 116, 14, 14],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 116, i2, i3],
origins={split_9}
)
), TensorBox(StorageBox(
Convolution(
name=buf109,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf108', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf107, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_10}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_21}
),
FixedLayout('cuda', torch.float32, size=[2, 116, 14, 14], stride=[45472, 196, 14, 1], offset=22736),
origins={convolution_38}
), InputBuffer(name='primals_115', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_38}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf109, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_285, i1) * reciprocal(sqrt(load(primals_286, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_116, i1) + load(primals_117, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_25}
))
)), TensorBox(StorageBox(
Convolution(
name=buf111,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf110', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf109, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_285, i1) * reciprocal(sqrt(load(primals_286, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_116, i1) + load(primals_117, i1)),
ranges=torch.Size([2, 116, 14, 14]),
origins={relu_25}
)), InputBuffer(name='primals_118', layout=FixedLayout('cuda', torch.float32, size=[116, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 116),
kwargs={},
output_view=None,
origins={convolution_39}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf111, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_288, i1) * reciprocal(sqrt(load(primals_289, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_119, i1) + load(primals_120, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_79}
))
)), TensorBox(StorageBox(
Convolution(
name=buf113,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]),
inputs=[ComputedBuffer(name='buf112', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 116, 14, 14]), stride=[22736, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf111, i3 + 14 * i2 + 196 * i1 + 22736 * i0) - load(primals_288, i1) * reciprocal(sqrt(load(primals_289, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_119, i1) + load(primals_120, i1),
ranges=torch.Size([2, 116, 14, 14]),
origins={add_79}
)), InputBuffer(name='primals_121', layout=FixedLayout('cuda', torch.float32, size=[116, 116, 1, 1], stride=[116, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_40}
)
)), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf116, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_11}
))
),
size=(2, 232, 14, 14),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 116), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_23}
)
), TensorBox(StorageBox(
Convolution(
name=buf118,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf116, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_11}
))
),
FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]),
origins={convolution_41}
), InputBuffer(name='primals_124', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_41}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf118, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_294, i1) * reciprocal(sqrt(load(primals_295, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_125, i1) + load(primals_126, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_83}
))
)), TensorBox(StorageBox(
Convolution(
name=buf120,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf119', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf118, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_294, i1) * reciprocal(sqrt(load(primals_295, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_125, i1) + load(primals_126, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_83}
)), InputBuffer(name='primals_127', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_42}
)
)), TensorBox(StorageBox(
Convolution(
name=buf122,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf117', layout=FixedLayout('cuda', torch.float32, size=[2, 116, 2, 14, 14], stride=[45472, 392, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf116, i4 + 14 * i3 + 196 * i1 + 22736 * i2 + 45472 * i0),
ranges=[2, 116, 2, 14, 14],
origins={clone_11}
))
),
FixedLayout('cuda', torch.float32, size=(2, 232, 14, 14), stride=[45472, 196, 14, 1]),
origins={convolution_43}
), InputBuffer(name='primals_130', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_43}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf122, i3 + 14 * i2 + 196 * i1 + 45472 * i0) - load(primals_300, i1) * reciprocal(sqrt(load(primals_301, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_131, i1) + load(primals_132, i1)),
ranges=torch.Size([2, 232, 14, 14]),
origins={relu_28}
))
)), TensorBox(StorageBox(
Convolution(
name=buf124,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf123', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 14, 14]), stride=[45472, 196, 14, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf122, i3 + 14 * i2 + 196 * i1 + 45472 * i0) - load(primals_300, i1) * reciprocal(sqrt(load(primals_301, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_131, i1) + load(primals_132, i1)),
ranges=torch.Size([2, 232, 14, 14]),
origins={relu_28}
)), InputBuffer(name='primals_133', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (2, 2), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_44}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf125', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf124, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_303, i1) * reciprocal(sqrt(load(primals_304, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_134, i1) + load(primals_135, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_89}
))
)), TensorBox(StorageBox(
Convolution(
name=buf126,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf125', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf124, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_303, i1) * reciprocal(sqrt(load(primals_304, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_134, i1) + load(primals_135, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_89}
)), InputBuffer(name='primals_136', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_45}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf129', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf128, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_12}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_25}
),
size=[2, 232, 7, 7],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 232, i2, i3],
origins={split_10}
)
), TensorBox(StorageBox(
Convolution(
name=buf130,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf129', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf128, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_12}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_25}
),
FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[22736, 49, 7, 1], offset=11368),
origins={convolution_46}
), InputBuffer(name='primals_139', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_46}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf131', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf130, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_309, i1) * reciprocal(sqrt(load(primals_310, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_140, i1) + load(primals_141, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_30}
))
)), TensorBox(StorageBox(
Convolution(
name=buf132,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf131', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf130, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_309, i1) * reciprocal(sqrt(load(primals_310, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_140, i1) + load(primals_141, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_30}
)), InputBuffer(name='primals_142', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_47}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf133', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf132, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_312, i1) * reciprocal(sqrt(load(primals_313, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_143, i1) + load(primals_144, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_95}
))
)), TensorBox(StorageBox(
Convolution(
name=buf134,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf133', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf132, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_312, i1) * reciprocal(sqrt(load(primals_313, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_143, i1) + load(primals_144, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_95}
)), InputBuffer(name='primals_145', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_48}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf138', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf137, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_13}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_27}
),
size=[2, 232, 7, 7],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 232, i2, i3],
origins={split_11}
)
), TensorBox(StorageBox(
Convolution(
name=buf139,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf138', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf137, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_13}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_27}
),
FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[22736, 49, 7, 1], offset=11368),
origins={convolution_49}
), InputBuffer(name='primals_148', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_49}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf140', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf139, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_318, i1) * reciprocal(sqrt(load(primals_319, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_149, i1) + load(primals_150, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_32}
))
)), TensorBox(StorageBox(
Convolution(
name=buf141,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf140', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf139, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_318, i1) * reciprocal(sqrt(load(primals_319, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_149, i1) + load(primals_150, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_32}
)), InputBuffer(name='primals_151', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_50}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf142', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf141, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_321, i1) * reciprocal(sqrt(load(primals_322, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_152, i1) + load(primals_153, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_101}
))
)), TensorBox(StorageBox(
Convolution(
name=buf143,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf142', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf141, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_321, i1) * reciprocal(sqrt(load(primals_322, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_152, i1) + load(primals_153, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_101}
)), InputBuffer(name='primals_154', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_51}
)
)), TensorBox(
SliceView(
View(
StorageBox(
ComputedBuffer(name='buf147', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf146, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_14}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_29}
),
size=[2, 232, 7, 7],
reindex=lambda i0, i1, i2, i3: [i0, i1 + 232, i2, i3],
origins={split_12}
)
), TensorBox(StorageBox(
Convolution(
name=buf148,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ReinterpretView(
View(
StorageBox(
ComputedBuffer(name='buf147', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf146, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_14}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_29}
),
FixedLayout('cuda', torch.float32, size=[2, 232, 7, 7], stride=[22736, 49, 7, 1], offset=11368),
origins={convolution_52}
), InputBuffer(name='primals_157', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_52}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf149', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf148, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_327, i1) * reciprocal(sqrt(load(primals_328, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_158, i1) + load(primals_159, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_34}
))
)), TensorBox(StorageBox(
Convolution(
name=buf150,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf149', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf148, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_327, i1) * reciprocal(sqrt(load(primals_328, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_158, i1) + load(primals_159, i1)),
ranges=torch.Size([2, 232, 7, 7]),
origins={relu_34}
)), InputBuffer(name='primals_160', layout=FixedLayout('cuda', torch.float32, size=[232, 1, 3, 3], stride=[9, 9, 3, 1]))],
constant_args=(None, (1, 1), (1, 1), (1, 1), False, (0, 0), 232),
kwargs={},
output_view=None,
origins={convolution_53}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf151', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf150, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_330, i1) * reciprocal(sqrt(load(primals_331, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_161, i1) + load(primals_162, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_107}
))
)), TensorBox(StorageBox(
Convolution(
name=buf152,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]),
inputs=[ComputedBuffer(name='buf151', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 232, 7, 7]), stride=[11368, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf150, i3 + 7 * i2 + 49 * i1 + 11368 * i0) - load(primals_330, i1) * reciprocal(sqrt(load(primals_331, i1) + constant(1e-05, torch.float32))) * constant(1, torch.float32) * load(primals_161, i1) + load(primals_162, i1),
ranges=torch.Size([2, 232, 7, 7]),
origins={add_107}
)), InputBuffer(name='primals_163', layout=FixedLayout('cuda', torch.float32, size=[232, 232, 1, 1], stride=[232, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_54}
)
)), TensorBox(
View(
StorageBox(
ComputedBuffer(name='buf156', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf155, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_15}
))
),
size=(2, 464, 7, 7),
reindex=lambda i0, i1, i2, i3: [i0, ModularIndexing(i1, 2, 232), ModularIndexing(i1, 1, 2), i2, i3],
origins={view_31}
)
), TensorBox(StorageBox(
Convolution(
name=buf157,
layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 1024, 7, 7]), stride=[50176, 49, 7, 1]),
inputs=[ReinterpretView(
StorageBox(
ComputedBuffer(name='buf156', layout=FixedLayout('cuda', torch.float32, size=[2, 232, 2, 7, 7], stride=[22736, 98, 49, 7, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf155, i4 + 7 * i3 + 49 * i1 + 11368 * i2 + 22736 * i0),
ranges=[2, 232, 2, 7, 7],
origins={clone_15}
))
),
FixedLayout('cuda', torch.float32, size=(2, 464, 7, 7), stride=[22736, 49, 7, 1]),
origins={convolution_55}
), InputBuffer(name='primals_166', layout=FixedLayout('cuda', torch.float32, size=[1024, 464, 1, 1], stride=[464, 1, 1, 1]))],
constant_args=(None, (1, 1), (0, 0), (1, 1), False, (0, 0), 1),
kwargs={},
output_view=None,
origins={convolution_55}
)
)), TensorBox(StorageBox(
ComputedBuffer(name='buf160', layout=FlexibleLayout('cuda', torch.float32, size=[2, 1024], stride=[1024, 1]), data=Pointwise(
'cuda',
torch.float32,
load(buf159, i1 + 1024 * i0) / index_expr(49, torch.float32),
ranges=[2, 1024],
origins={mean}
))
)), TensorBox(
ReinterpretView(
StorageBox(
InputBuffer(name='primals_169', layout=FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]))
),
FixedLayout('cuda', torch.float32, size=[1000, 1024], stride=[1024, 1]),
origins={permute_17}
)
), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf158, i3 + 7 * i2 + 49 * i1 + 50176 * i0) <= constant(0, torch.float32),
ranges=torch.Size([2, 1024, 7, 7]),
origins={le}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf153, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_1}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf144, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_3}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf135, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_5}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf127, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_7}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf121, i3 + 7 * i2 + 49 * i1 + 22736 * i0) <= constant(0, torch.float32),
ranges=[2, 232, 7, 7],
origins={le_9}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf114, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_10}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf105, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_12}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf96, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_14}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf87, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_16}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf78, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_18}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf69, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_20}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf60, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_22}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf52, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_24}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf46, i3 + 14 * i2 + 196 * i1 + 45472 * i0) <= constant(0, torch.float32),
ranges=[2, 116, 14, 14],
origins={le_26}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf39, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_27}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf30, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_29}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf21, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_31}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf13, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_33}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf7, i3 + 28 * i2 + 784 * i1 + 90944 * i0) <= constant(0, torch.float32),
ranges=[2, 58, 28, 28],
origins={le_35}
)
)), s0, 28, 28, 14, 14, 7, 7]
While executing return [addmm, primals_1, primals_2, primals_4, primals_5, primals_7, primals_8, primals_10, primals_11, primals_13, primals_14, primals_16, primals_17, primals_19, primals_20, primals_22, primals_23, primals_25, primals_26, primals_28, primals_29, primals_31, primals_32, primals_34, primals_35, primals_37, primals_38, primals_40, primals_41, primals_43, primals_44, primals_46, primals_47, primals_49, primals_50, primals_52, primals_53, primals_55, primals_56, primals_58, primals_59, primals_61, primals_62, primals_64, primals_65, primals_67, primals_68, primals_70, primals_71, primals_73, primals_74, primals_76, primals_77, primals_79, primals_80, primals_82, primals_83, primals_85, primals_86, primals_88, primals_89, primals_91, primals_92, primals_94, primals_95, primals_97, primals_98, primals_100, primals_101, primals_103, primals_104, primals_106, primals_107, primals_109, primals_110, primals_112, primals_113, primals_115, primals_116, primals_118, primals_119, primals_121, primals_122, primals_124, primals_125, primals_127, primals_128, primals_130, primals_131, primals_133, primals_134, primals_136, primals_137, primals_139, primals_140, primals_142, primals_143, primals_145, primals_146, primals_148, primals_149, primals_151, primals_152, primals_154, primals_155, primals_157, primals_158, primals_160, primals_161, primals_163, primals_164, primals_166, primals_167, primals_171, primals_172, primals_174, primals_175, primals_177, primals_178, primals_180, primals_181, primals_183, primals_184, primals_186, primals_187, primals_189, primals_190, primals_192, primals_193, primals_195, primals_196, primals_198, primals_199, primals_201, primals_202, primals_204, primals_205, primals_207, primals_208, primals_210, primals_211, primals_213, primals_214, primals_216, primals_217, primals_219, primals_220, primals_222, primals_223, primals_225, primals_226, primals_228, primals_229, primals_231, primals_232, primals_234, primals_235, primals_237, primals_238, primals_240, primals_241, primals_243, primals_244, primals_246, primals_247, primals_249, primals_250, primals_252, primals_253, primals_255, primals_256, primals_258, primals_259, primals_261, primals_262, primals_264, primals_265, primals_267, primals_268, primals_270, primals_271, primals_273, primals_274, primals_276, primals_277, primals_279, primals_280, primals_282, primals_283, primals_285, primals_286, primals_288, primals_289, primals_291, primals_292, primals_294, primals_295, primals_297, primals_298, primals_300, primals_301, primals_303, primals_304, primals_306, primals_307, primals_309, primals_310, primals_312, primals_313, primals_315, primals_316, primals_318, primals_319, primals_321, primals_322, primals_324, primals_325, primals_327, primals_328, primals_330, primals_331, primals_333, primals_334, primals_336, primals_337, primals_339, convolution, relu, getitem, getitem_1, convolution_1, add_3, convolution_2, convolution_3, relu_2, convolution_4, add_9, convolution_5, getitem_3, convolution_6, relu_4, convolution_7, add_15, convolution_8, getitem_5, convolution_9, relu_6, convolution_10, add_21, convolution_11, getitem_7, convolution_12, relu_8, convolution_13, add_27, convolution_14, view_7, convolution_15, add_31, convolution_16, convolution_17, relu_11, convolution_18, add_37, convolution_19, getitem_9, convolution_20, relu_13, convolution_21, add_43, convolution_22, getitem_11, convolution_23, relu_15, convolution_24, add_49, convolution_25, getitem_13, convolution_26, relu_17, convolution_27, add_55, convolution_28, getitem_15, convolution_29, relu_19, convolution_30, add_61, convolution_31, getitem_17, convolution_32, relu_21, convolution_33, add_67, convolution_34, getitem_19, convolution_35, relu_23, convolution_36, add_73, convolution_37, getitem_21, convolution_38, relu_25, convolution_39, add_79, convolution_40, view_23, convolution_41, add_83, convolution_42, convolution_43, relu_28, convolution_44, add_89, convolution_45, getitem_23, convolution_46, relu_30, convolution_47, add_95, convolution_48, getitem_25, convolution_49, relu_32, convolution_50, add_101, convolution_51, getitem_27, convolution_52, relu_34, convolution_53, add_107, convolution_54, view_31, convolution_55, mean, permute_17, le, le_1, le_3, le_5, le_7, le_9, le_10, le_12, le_14, le_16, le_18, le_20, le_22, le_24, le_26, le_27, le_29, le_31, le_33, le_35, sym_size, sym_size_1, sym_size_2, sym_size_3, sym_size_4, sym_size_5, sym_size_6]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train shufflenet_v2_x1_0 FAIL
/scratch/ezyang/work/env/lib/python3.9/site-packages/gym/core.py:317: DeprecationWarning: WARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.
deprecation(
/scratch/ezyang/work/env/lib/python3.9/site-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: WARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.
deprecation(
Running torchbench.py soft_actor_critic...
cuda train soft_actor_critic PASS
Running torchbench.py speech_transformer...
ERROR:common:
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/transformer.py", line 28, in forward
encoder_padded_outputs, *_ = self.encoder(padded_input, input_lengths)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 48, in forward
non_pad_mask = get_non_pad_mask(padded_input, input_lengths=input_lengths)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 50, in <graph break in forward>
slf_attn_mask = get_attn_pad_mask(padded_input, input_lengths, length)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 55, in <graph break in forward>
self.positional_encoding(padded_input))
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/speech_transformer/speech_transformer/transformer/encoder.py", line 55, in <graph break in forward>
self.positional_encoding(padded_input))
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 122, in compile_fx_inner
compiled_fn = graph.compile_to_fn()
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 349, in compile_to_fn
return self.compile_to_module().call
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 335, in compile_to_module
code = self.codegen()
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 326, in codegen
self.wrapper_code = WrapperCodeGen()
File "/scratch/ezyang/work/pytorch/torch/_inductor/codegen/wrapper.py", line 240, in __init__
V.graph.sizevars.codegen(self.prefix, V.graph.graph_inputs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/sizevars.py", line 481, in codegen
assert not needed
AssertionError
TorchDynamo optimized model failed to run because of following error
cuda train speech_transformer FAIL
Running torchbench.py squeezenet1_1...
ERROR:common:[TensorBox(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf65, i1 + 1000 * i0) / index_expr(169, torch.float32),
ranges=[2, 1000, 1, 1],
origins={mean}
)
),
size=(2, 1000),
reindex=lambda i0, i1: [i0, i1, 0, 0],
origins={view}
)
), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 3, 3], stride=[27, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_3', layout=FixedLayout('cuda', torch.float32, size=[16, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 1, 1], stride=[16, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 3, 3], stride=[144, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_9', layout=FixedLayout('cuda', torch.float32, size=[16, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 1, 1], stride=[16, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 3, 3], stride=[144, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_15', layout=FixedLayout('cuda', torch.float32, size=[32, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 1, 1], stride=[32, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 3, 3], stride=[288, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_21', layout=FixedLayout('cuda', torch.float32, size=[32, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 1, 1], stride=[32, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 3, 3], stride=[288, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_27', layout=FixedLayout('cuda', torch.float32, size=[48, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 1, 1], stride=[48, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 3, 3], stride=[432, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_33', layout=FixedLayout('cuda', torch.float32, size=[48, 384, 1, 1], stride=[384, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 1, 1], stride=[48, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 3, 3], stride=[432, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_39', layout=FixedLayout('cuda', torch.float32, size=[64, 384, 1, 1], stride=[384, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_45', layout=FixedLayout('cuda', torch.float32, size=[64, 512, 1, 1], stride=[512, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_51', layout=FixedLayout('cuda', torch.float32, size=[1000, 512, 1, 1], stride=[512, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf1', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 64, 111, 111]), stride=[788544, 12321, 111, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf0, i3 + 111 * i2 + 12321 * i1 + 788544 * i0) + load(primals_2, i1)),
ranges=torch.Size([2, 64, 111, 111]),
origins={relu}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[193600, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(load(buf1, 224 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))))))),
ranges=[2, 64, 55, 55],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf3', layout=FlexibleLayout('cuda', torch.int64, size=[2, 64, 55, 55], stride=[193600, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.int64,
where(load(buf1, 224 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))))))), index_expr(224 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))))), index_expr(223 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))))), index_expr(222 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))), index_expr(113 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))), index_expr(112 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))), index_expr(111 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)), index_expr(2 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), index_expr(1 + 2 * i3 + 222 * i2, torch.int64), index_expr(2 * i3 + 222 * i2, torch.int64))))))))),
ranges=[2, 64, 55, 55],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 16, 55, 55]), stride=[48400, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf4, i3 + 55 * i2 + 3025 * i1 + 48400 * i0) + load(primals_4, i1)),
ranges=torch.Size([2, 16, 55, 55]),
origins={relu_1}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf10', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 55, 55], stride=[387200, 3025, 55, 1]), inputs=[ComputedBuffer(name='buf8', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf6, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_6, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_2}
)), ComputedBuffer(name='buf9', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf7, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_8, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_3}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 16, 55, 55]), stride=[48400, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf11, i3 + 55 * i2 + 3025 * i1 + 48400 * i0) + load(primals_10, i1)),
ranges=torch.Size([2, 16, 55, 55]),
origins={relu_4}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf17', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 55, 55], stride=[387200, 3025, 55, 1]), inputs=[ComputedBuffer(name='buf15', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf13, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_12, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_5}
)), ComputedBuffer(name='buf16', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf14, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_14, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_6}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[93312, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(load(buf17, 112 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))))))),
ranges=[2, 128, 27, 27],
origins={max_pool2d_with_indices_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf19', layout=FlexibleLayout('cuda', torch.int64, size=[2, 128, 27, 27], stride=[93312, 729, 27, 1]), data=Pointwise(
'cuda',
torch.int64,
where(load(buf17, 112 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))))))), index_expr(112 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))))), index_expr(111 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))))), index_expr(110 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))), index_expr(57 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))), index_expr(56 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))), index_expr(55 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)), index_expr(2 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), index_expr(1 + 2 * i3 + 110 * i2, torch.int64), index_expr(2 * i3 + 110 * i2, torch.int64))))))))),
ranges=[2, 128, 27, 27],
origins={max_pool2d_with_indices_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf21', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 32, 27, 27]), stride=[23328, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf20, i3 + 27 * i2 + 729 * i1 + 23328 * i0) + load(primals_16, i1)),
ranges=torch.Size([2, 32, 27, 27]),
origins={relu_7}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf26', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 27, 27], stride=[186624, 729, 27, 1]), inputs=[ComputedBuffer(name='buf24', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf22, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_18, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_8}
)), ComputedBuffer(name='buf25', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf23, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_20, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_9}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 32, 27, 27]), stride=[23328, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf27, i3 + 27 * i2 + 729 * i1 + 23328 * i0) + load(primals_22, i1)),
ranges=torch.Size([2, 32, 27, 27]),
origins={relu_10}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 27, 27], stride=[186624, 729, 27, 1]), inputs=[ComputedBuffer(name='buf31', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf29, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_24, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_11}
)), ComputedBuffer(name='buf32', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf30, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_26, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_12}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[43264, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(load(buf33, 56 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))))))),
ranges=[2, 256, 13, 13],
origins={max_pool2d_with_indices_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf35', layout=FlexibleLayout('cuda', torch.int64, size=[2, 256, 13, 13], stride=[43264, 169, 13, 1]), data=Pointwise(
'cuda',
torch.int64,
where(load(buf33, 56 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))))))), index_expr(56 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))))), index_expr(55 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))))), index_expr(54 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))), index_expr(29 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))), index_expr(28 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))), index_expr(27 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)), index_expr(2 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), index_expr(1 + 2 * i3 + 54 * i2, torch.int64), index_expr(2 * i3 + 54 * i2, torch.int64))))))))),
ranges=[2, 256, 13, 13],
origins={max_pool2d_with_indices_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 48, 13, 13]), stride=[8112, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf36, i3 + 13 * i2 + 169 * i1 + 8112 * i0) + load(primals_28, i1)),
ranges=torch.Size([2, 48, 13, 13]),
origins={relu_13}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 384, 13, 13], stride=[64896, 169, 13, 1]), inputs=[ComputedBuffer(name='buf40', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf38, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_30, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_14}
)), ComputedBuffer(name='buf41', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf39, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_32, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_15}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 48, 13, 13]), stride=[8112, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf43, i3 + 13 * i2 + 169 * i1 + 8112 * i0) + load(primals_34, i1)),
ranges=torch.Size([2, 48, 13, 13]),
origins={relu_16}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf49', layout=FixedLayout('cuda', torch.float32, size=[2, 384, 13, 13], stride=[64896, 169, 13, 1]), inputs=[ComputedBuffer(name='buf47', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf45, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_36, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_17}
)), ComputedBuffer(name='buf48', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf46, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_38, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_18}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 13, 13]), stride=[10816, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf50, i3 + 13 * i2 + 169 * i1 + 10816 * i0) + load(primals_40, i1)),
ranges=torch.Size([2, 64, 13, 13]),
origins={relu_19}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf56', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 13, 13], stride=[86528, 169, 13, 1]), inputs=[ComputedBuffer(name='buf54', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf52, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_42, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_20}
)), ComputedBuffer(name='buf55', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf53, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_44, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_21}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 13, 13]), stride=[10816, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf57, i3 + 13 * i2 + 169 * i1 + 10816 * i0) + load(primals_46, i1)),
ranges=torch.Size([2, 64, 13, 13]),
origins={relu_22}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 13, 13], stride=[86528, 169, 13, 1]), inputs=[ComputedBuffer(name='buf61', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf59, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_48, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_23}
)), ComputedBuffer(name='buf62', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf60, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_50, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_24}
))])
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
relu(load(buf64, i3 + 13 * i2 + 169 * i1 + 169000 * i0) + load(primals_52, i1)) <= constant(0, torch.float32),
ranges=torch.Size([2, 1000, 13, 13]),
origins={le}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf62, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_1}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf61, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_2}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf55, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_4}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf54, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_5}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf48, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_7}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf47, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_8}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf41, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_10}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf40, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_11}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf32, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_13}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf31, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_14}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf25, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_16}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf24, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_17}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf16, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_19}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf15, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_20}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf9, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_22}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf8, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_23}
)
)), s0, 13, 13]
While executing return [view, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, primals_49, primals_51, primals_53, relu, getitem, getitem_1, relu_1, cat, relu_4, cat_1, getitem_2, getitem_3, relu_7, cat_2, relu_10, cat_3, getitem_4, getitem_5, relu_13, cat_4, relu_16, cat_5, relu_19, cat_6, relu_22, cat_7, le, le_1, le_2, le_4, le_5, le_7, le_8, le_10, le_11, le_13, le_14, le_16, le_17, le_19, le_20, le_22, le_23, sym_size, sym_size_1, sym_size_2]
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchvision/torchvision/models/squeezenet.py", line 94, in forward
def forward(self, x: torch.Tensor) -> torch.Tensor:
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 516, in aot_dispatch_autograd
compiled_fw_func = aot_config.fw_compiler(fw_module, deduped_flat_args)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 351, in fw_compiler
return compile_fx_inner(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/debug_utils.py", line 444, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/scratch/ezyang/work/env/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/scratch/ezyang/work/pytorch/torch/_inductor/compile_fx.py", line 121, in compile_fx_inner
graph.run(*example_inputs)
File "/scratch/ezyang/work/pytorch/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 129, in run
return super().run(*args)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 130, in run
self.env[node] = self.run_node(node)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 299, in run_node
result = super().run_node(n)
File "/scratch/ezyang/work/pytorch/torch/fx/interpreter.py", line 171, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/scratch/ezyang/work/pytorch/torch/_inductor/graph.py", line 267, in output
assert all(
AssertionError: [TensorBox(
View(
StorageBox(
Pointwise(
'cuda',
torch.float32,
load(buf65, i1 + 1000 * i0) / index_expr(169, torch.float32),
ranges=[2, 1000, 1, 1],
origins={mean}
)
),
size=(2, 1000),
reindex=lambda i0, i1: [i0, i1, 0, 0],
origins={view}
)
), TensorBox(StorageBox(
InputBuffer(name='primals_1', layout=FixedLayout('cuda', torch.float32, size=[64, 3, 3, 3], stride=[27, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_3', layout=FixedLayout('cuda', torch.float32, size=[16, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_5', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 1, 1], stride=[16, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_7', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 3, 3], stride=[144, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_9', layout=FixedLayout('cuda', torch.float32, size=[16, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_11', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 1, 1], stride=[16, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_13', layout=FixedLayout('cuda', torch.float32, size=[64, 16, 3, 3], stride=[144, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_15', layout=FixedLayout('cuda', torch.float32, size=[32, 128, 1, 1], stride=[128, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_17', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 1, 1], stride=[32, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_19', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 3, 3], stride=[288, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_21', layout=FixedLayout('cuda', torch.float32, size=[32, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_23', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 1, 1], stride=[32, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_25', layout=FixedLayout('cuda', torch.float32, size=[128, 32, 3, 3], stride=[288, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_27', layout=FixedLayout('cuda', torch.float32, size=[48, 256, 1, 1], stride=[256, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_29', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 1, 1], stride=[48, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_31', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 3, 3], stride=[432, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_33', layout=FixedLayout('cuda', torch.float32, size=[48, 384, 1, 1], stride=[384, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_35', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 1, 1], stride=[48, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_37', layout=FixedLayout('cuda', torch.float32, size=[192, 48, 3, 3], stride=[432, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_39', layout=FixedLayout('cuda', torch.float32, size=[64, 384, 1, 1], stride=[384, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_41', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_43', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_45', layout=FixedLayout('cuda', torch.float32, size=[64, 512, 1, 1], stride=[512, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_47', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 1, 1], stride=[64, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_49', layout=FixedLayout('cuda', torch.float32, size=[256, 64, 3, 3], stride=[576, 9, 3, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_51', layout=FixedLayout('cuda', torch.float32, size=[1000, 512, 1, 1], stride=[512, 1, 1, 1]))
)), TensorBox(StorageBox(
InputBuffer(name='primals_53', layout=FixedLayout('cuda', torch.float32, size=[s0, 3, s2, s2], stride=[3*s2**2, s2**2, s2, 1]))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf1', layout=FlexibleLayout('cuda', torch.float32, size=torch.Size([2, 64, 111, 111]), stride=[788544, 12321, 111, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf0, i3 + 111 * i2 + 12321 * i1 + 788544 * i0) + load(primals_2, i1)),
ranges=torch.Size([2, 64, 111, 111]),
origins={relu}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf2', layout=FixedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[193600, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(load(buf1, 224 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))))))),
ranges=[2, 64, 55, 55],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf3', layout=FlexibleLayout('cuda', torch.int64, size=[2, 64, 55, 55], stride=[193600, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.int64,
where(load(buf1, 224 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))))))), index_expr(224 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 223 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))))), index_expr(223 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 222 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))))), index_expr(222 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 113 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))))), index_expr(113 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 112 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)))), index_expr(112 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 111 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0))), index_expr(111 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 2 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > maximum(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0)), index_expr(2 + 2 * i3 + 222 * i2, torch.int64), where(load(buf1, 1 + 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0) > load(buf1, 2 * i3 + 222 * i2 + 12321 * i1 + 788544 * i0), index_expr(1 + 2 * i3 + 222 * i2, torch.int64), index_expr(2 * i3 + 222 * i2, torch.int64))))))))),
ranges=[2, 64, 55, 55],
origins={max_pool2d_with_indices}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf5', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 16, 55, 55]), stride=[48400, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf4, i3 + 55 * i2 + 3025 * i1 + 48400 * i0) + load(primals_4, i1)),
ranges=torch.Size([2, 16, 55, 55]),
origins={relu_1}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf10', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 55, 55], stride=[387200, 3025, 55, 1]), inputs=[ComputedBuffer(name='buf8', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf6, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_6, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_2}
)), ComputedBuffer(name='buf9', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf7, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_8, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_3}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf12', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 16, 55, 55]), stride=[48400, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf11, i3 + 55 * i2 + 3025 * i1 + 48400 * i0) + load(primals_10, i1)),
ranges=torch.Size([2, 16, 55, 55]),
origins={relu_4}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf17', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 55, 55], stride=[387200, 3025, 55, 1]), inputs=[ComputedBuffer(name='buf15', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf13, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_12, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_5}
)), ComputedBuffer(name='buf16', layout=AliasedLayout('cuda', torch.float32, size=[2, 64, 55, 55], stride=[387200, 3025, 55, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf14, i3 + 55 * i2 + 3025 * i1 + 193600 * i0) + load(primals_14, i1)),
ranges=torch.Size([2, 64, 55, 55]),
origins={relu_6}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf18', layout=FixedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[93312, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(load(buf17, 112 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))))))),
ranges=[2, 128, 27, 27],
origins={max_pool2d_with_indices_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf19', layout=FlexibleLayout('cuda', torch.int64, size=[2, 128, 27, 27], stride=[93312, 729, 27, 1]), data=Pointwise(
'cuda',
torch.int64,
where(load(buf17, 112 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))))))), index_expr(112 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 111 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))))), index_expr(111 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 110 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))))), index_expr(110 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 57 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))))), index_expr(57 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 56 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)))), index_expr(56 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 55 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0))), index_expr(55 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 2 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > maximum(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0)), index_expr(2 + 2 * i3 + 110 * i2, torch.int64), where(load(buf17, 1 + 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0) > load(buf17, 2 * i3 + 110 * i2 + 3025 * i1 + 387200 * i0), index_expr(1 + 2 * i3 + 110 * i2, torch.int64), index_expr(2 * i3 + 110 * i2, torch.int64))))))))),
ranges=[2, 128, 27, 27],
origins={max_pool2d_with_indices_1}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf21', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 32, 27, 27]), stride=[23328, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf20, i3 + 27 * i2 + 729 * i1 + 23328 * i0) + load(primals_16, i1)),
ranges=torch.Size([2, 32, 27, 27]),
origins={relu_7}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf26', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 27, 27], stride=[186624, 729, 27, 1]), inputs=[ComputedBuffer(name='buf24', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf22, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_18, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_8}
)), ComputedBuffer(name='buf25', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf23, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_20, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_9}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf28', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 32, 27, 27]), stride=[23328, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf27, i3 + 27 * i2 + 729 * i1 + 23328 * i0) + load(primals_22, i1)),
ranges=torch.Size([2, 32, 27, 27]),
origins={relu_10}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf33', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 27, 27], stride=[186624, 729, 27, 1]), inputs=[ComputedBuffer(name='buf31', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf29, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_24, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_11}
)), ComputedBuffer(name='buf32', layout=AliasedLayout('cuda', torch.float32, size=[2, 128, 27, 27], stride=[186624, 729, 27, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf30, i3 + 27 * i2 + 729 * i1 + 93312 * i0) + load(primals_26, i1)),
ranges=torch.Size([2, 128, 27, 27]),
origins={relu_12}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf34', layout=FixedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[43264, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
maximum(load(buf33, 56 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))))))),
ranges=[2, 256, 13, 13],
origins={max_pool2d_with_indices_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf35', layout=FlexibleLayout('cuda', torch.int64, size=[2, 256, 13, 13], stride=[43264, 169, 13, 1]), data=Pointwise(
'cuda',
torch.int64,
where(load(buf33, 56 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))))))), index_expr(56 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 55 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))))), index_expr(55 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 54 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))))), index_expr(54 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 29 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))))), index_expr(29 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 28 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)))), index_expr(28 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 27 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0))), index_expr(27 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 2 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > maximum(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0)), index_expr(2 + 2 * i3 + 54 * i2, torch.int64), where(load(buf33, 1 + 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0) > load(buf33, 2 * i3 + 54 * i2 + 729 * i1 + 186624 * i0), index_expr(1 + 2 * i3 + 54 * i2, torch.int64), index_expr(2 * i3 + 54 * i2, torch.int64))))))))),
ranges=[2, 256, 13, 13],
origins={max_pool2d_with_indices_2}
))
)), TensorBox(StorageBox(
ComputedBuffer(name='buf37', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 48, 13, 13]), stride=[8112, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf36, i3 + 13 * i2 + 169 * i1 + 8112 * i0) + load(primals_28, i1)),
ranges=torch.Size([2, 48, 13, 13]),
origins={relu_13}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf42', layout=FixedLayout('cuda', torch.float32, size=[2, 384, 13, 13], stride=[64896, 169, 13, 1]), inputs=[ComputedBuffer(name='buf40', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf38, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_30, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_14}
)), ComputedBuffer(name='buf41', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf39, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_32, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_15}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf44', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 48, 13, 13]), stride=[8112, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf43, i3 + 13 * i2 + 169 * i1 + 8112 * i0) + load(primals_34, i1)),
ranges=torch.Size([2, 48, 13, 13]),
origins={relu_16}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf49', layout=FixedLayout('cuda', torch.float32, size=[2, 384, 13, 13], stride=[64896, 169, 13, 1]), inputs=[ComputedBuffer(name='buf47', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf45, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_36, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_17}
)), ComputedBuffer(name='buf48', layout=AliasedLayout('cuda', torch.float32, size=[2, 192, 13, 13], stride=[64896, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf46, i3 + 13 * i2 + 169 * i1 + 32448 * i0) + load(primals_38, i1)),
ranges=torch.Size([2, 192, 13, 13]),
origins={relu_18}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf51', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 13, 13]), stride=[10816, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf50, i3 + 13 * i2 + 169 * i1 + 10816 * i0) + load(primals_40, i1)),
ranges=torch.Size([2, 64, 13, 13]),
origins={relu_19}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf56', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 13, 13], stride=[86528, 169, 13, 1]), inputs=[ComputedBuffer(name='buf54', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf52, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_42, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_20}
)), ComputedBuffer(name='buf55', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf53, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_44, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_21}
))])
)), TensorBox(StorageBox(
ComputedBuffer(name='buf58', layout=FixedLayout('cuda', torch.float32, size=torch.Size([2, 64, 13, 13]), stride=[10816, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf57, i3 + 13 * i2 + 169 * i1 + 10816 * i0) + load(primals_46, i1)),
ranges=torch.Size([2, 64, 13, 13]),
origins={relu_22}
))
)), TensorBox(StorageBox(
ConcatKernel(name='buf63', layout=FixedLayout('cuda', torch.float32, size=[2, 512, 13, 13], stride=[86528, 169, 13, 1]), inputs=[ComputedBuffer(name='buf61', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf59, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_48, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_23}
)), ComputedBuffer(name='buf62', layout=AliasedLayout('cuda', torch.float32, size=[2, 256, 13, 13], stride=[86528, 169, 13, 1]), data=Pointwise(
'cuda',
torch.float32,
relu(load(buf60, i3 + 13 * i2 + 169 * i1 + 43264 * i0) + load(primals_50, i1)),
ranges=torch.Size([2, 256, 13, 13]),
origins={relu_24}
))])
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
relu(load(buf64, i3 + 13 * i2 + 169 * i1 + 169000 * i0) + load(primals_52, i1)) <= constant(0, torch.float32),
ranges=torch.Size([2, 1000, 13, 13]),
origins={le}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf62, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_1}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf61, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_2}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf55, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_4}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf54, i3 + 13 * i2 + 169 * i1 + 86528 * i0) <= constant(0, torch.float32),
ranges=[2, 256, 13, 13],
origins={le_5}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf48, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_7}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf47, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_8}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf41, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_10}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf40, i3 + 13 * i2 + 169 * i1 + 64896 * i0) <= constant(0, torch.float32),
ranges=[2, 192, 13, 13],
origins={le_11}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf32, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_13}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf31, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_14}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf25, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_16}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf24, i3 + 27 * i2 + 729 * i1 + 186624 * i0) <= constant(0, torch.float32),
ranges=[2, 128, 27, 27],
origins={le_17}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf16, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_19}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf15, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_20}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf9, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_22}
)
)), TensorBox(StorageBox(
Pointwise(
'cuda',
torch.bool,
load(buf8, i3 + 55 * i2 + 3025 * i1 + 387200 * i0) <= constant(0, torch.float32),
ranges=[2, 64, 55, 55],
origins={le_23}
)
)), s0, 13, 13]
While executing return [view, primals_1, primals_3, primals_5, primals_7, primals_9, primals_11, primals_13, primals_15, primals_17, primals_19, primals_21, primals_23, primals_25, primals_27, primals_29, primals_31, primals_33, primals_35, primals_37, primals_39, primals_41, primals_43, primals_45, primals_47, primals_49, primals_51, primals_53, relu, getitem, getitem_1, relu_1, cat, relu_4, cat_1, getitem_2, getitem_3, relu_7, cat_2, relu_10, cat_3, getitem_4, getitem_5, relu_13, cat_4, relu_16, cat_5, relu_19, cat_6, relu_22, cat_7, le, le_1, le_2, le_4, le_5, le_7, le_8, le_10, le_11, le_13, le_14, le_16, le_17, le_19, le_20, le_22, le_23, sym_size, sym_size_1, sym_size_2]
Original traceback:
None
TorchDynamo optimized model failed to run because of following error
cuda train squeezenet1_1 FAIL
Running torchbench.py tacotron2...
ERROR:common:Cannot call sizes() on tensor with symbolic sizes/strides
While executing %lowmem_dropout_2 : [#users=1] = call_function[target=torch._inductor.overrides.lowmem_dropout](args = (%relu, 0.5, True), kwargs = {})
Original traceback:
None
Traceback (most recent call last):
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1122, in check_accuracy
new_result = optimized_model_iter_fn(
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/common.py", line 1020, in run_n_iterations
self.model_iter_fn(mod, inputs, collect_outputs=False)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 332, in forward_and_backward_pass
cloned_inputs = clone_inputs(inputs)
File "/scratch/ezyang/work/pytorch/benchmarks/dynamo/torchbench.py", line 335, in <graph break in forward_and_backward_pass>
pred = mod(*cloned_inputs)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/tacotron2/model.py", line 505, in forward
encoder_outputs = self.encoder(embedded_inputs, text_lengths)
File "/scratch/ezyang/work/pytorch/torch/nn/modules/module.py", line 1423, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/ezyang/work/torchbenchmark/torchbenchmark/models/tacotron2/model.py", line 173, in forward
def forward(self, x, input_lengths):
File "/scratch/ezyang/work/pytorch/torch/_dynamo/eval_frame.py", line 173, in _fn
return fn(*args, **kwargs)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 954, in forward
return compiled_f(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 940, in new_func
compiled_fn = create_aot_dispatcher_function(
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 660, in create_aot_dispatcher_function
aot_dispatch_autograd(flat_fn, fake_flat_tensor_args, aot_config)
File "/scratch/ezyang/work/pytorch/functorch/_src/aot_autograd.py", line 462, in aot_dispatch_autograd
out = flat_fn(*flat_args)
File "/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment