This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`--> python benchmarks/dynamo/torchbench.py --accuracy --float32 -d cpu -n10 --training --only resnet50_quantized_qat --cold-start-latency --backend=aot_eager | |
loading model: 0it [00:13, ?it/s] | |
cpu train resnet50_quantized_qat | |
WARNING:common:fp64 golden ref were not generated for resnet50_quantized_qat. Setting accuracy check to cosine | |
WARNING:common:Trying to call the empty_gpu_cache for device: cpu, which is not in list [cuda, xpu] | |
WARNING:common:Trying to call the empty_gpu_cache for device: cpu, which is not in list [cuda, xpu] | |
WARNING:common:Trying to call the empty_gpu_cache for device: cpu, which is not in list [cuda, xpu] | |
ERROR:common: | |
Traceback (most recent call last): | |
File "/data/users/mlazos/pytorch/benchmarks/dynamo/common.py", line 2326, in check_accuracy |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
linear_compiled = torch.compile(linear, backend="inductor") | |
with config.patch( | |
{ | |
"max_autotune": True, | |
"max_autotune_gemm_backends": "TRITON", | |
"cuda.cutlass_max_profiling_configs": 2, | |
"fx_graph_cache": False, | |
"fx_graph_remote_cache": False, | |
"cuda.enable_caching_codegen": True, | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_tuple_return(self): | |
def inner_fn(x, y): | |
x0 = x + x + 1 | |
y0 = y + y + 1 | |
return x0, y0 | |
def fn(x0, x1, x2, y0, y1, y2): | |
x0 = inner_fn(x0, y0) | |
x1 = inner_fn(x1, y1) | |
x2 = inner_fn(x2, y2) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import triton | |
import triton.language as tl | |
import helion | |
import helion.language as hl | |
@helion.kernel | |
def layer_norm_fwd( | |
x: torch.Tensor, | |
nomralized_shape: list[int], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
best = self._autotune() | |
^^^^^^^^^^^^^^^^ | |
File "/data/users/mlazos/helion/helion/autotuner/differential_evolution.py", line 97, in _autotune | |
self.initial_two_generations() | |
File "/data/users/mlazos/helion/helion/autotuner/differential_evolution.py", line 59, in initial_two_generations | |
self.parallel_benchmark_flat( | |
File "/data/users/mlazos/helion/helion/autotuner/base_search.py", line 359, in parallel_benchmark_flat | |
to_check, configs, self.parallel_benchmark(configs), strict=True | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/data/users/mlazos/helion/helion/autotuner/base_search.py", line 227, in parallel_benchmark |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import triton | |
import triton.language as tl | |
import helion | |
import helion.language as hl | |
@helion.kernel | |
def layer_norm_fwd( | |
x: torch.Tensor, | |
nomralized_shape: list[int], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`--> python helion_welford.py | |
Traceback (most recent call last): | |
File "/data/users/mlazos/test/helion_welford.py", line 53, in <module> | |
print(layer_norm_fwd(torch.ones(2, 2, device="cuda"), [2], torch.ones(2, device="cuda"), torch.ones(2, device="cuda"))) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/data/users/mlazos/helion/helion/runtime/kernel.py", line 272, in __call__ | |
return self.bind(args)(*args) | |
^^^^^^^^^^^^^^^ | |
File "/data/users/mlazos/helion/helion/runtime/kernel.py", line 156, in bind | |
bound_kernel = self.bind(normalized_args) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch import tensor, device | |
import torch.fx as fx | |
from torch._dynamo.testing import rand_strided | |
from math import inf | |
import torch._inductor.inductor_prims | |
torch.backends.cuda.matmul.fp32_precision = 'tf32' | |
torch.backends.cuda.matmul.allow_tf32 = True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import triton | |
import triton.language as tl | |
import helion | |
import helion.language as hl | |
@helion.kernel | |
def layer_norm_fwd( | |
x: torch.Tensor, | |
nomralized_shape: list[int], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`--> python benchmarks/dynamo/torchbench.py --accuracy --float32 -d cpu -n10 --training --only resnet50_quantized_qat --cold-start-latency --backend=inductor | |
loading model: 0it [00:22, ?it/s] | |
cpu train resnet50_quantized_qat | |
WARNING:common:fp64 golden ref were not generated for resnet50_quantized_qat. Setting accuracy check to cosine | |
WARNING:common:Trying to call the empty_gpu_cache for device: cpu, which is not in list [cuda, xpu] | |
WARNING:common:Trying to call the empty_gpu_cache for device: cpu, which is not in list [cuda, xpu] | |
WARNING:common:Trying to call the empty_gpu_cache for device: cpu, which is not in list [cuda, xpu] | |
ERROR:common: | |
Traceback (most recent call last): | |
File "/data/users/mlazos/pytorch/benchmarks/dynamo/common.py", line 2236, in check_accuracy |
NewerOlder