James Wu jamesjwu

## gist:b47f69d0215e0ef6dc85e0d8091b5f20

loading model: 0it [00:00, ?it/s]
loading model: 0it [00:03, ?it/s]
ERROR:common:Backend dynamo failed in warmup()
Traceback (most recent call last):
  File "/data/users/jjwu/a/pytorch/benchmarks/dynamo/common.py", line 2561, in warmup
    fn(model, example_inputs)
  File "/data/users/jjwu/a/pytorch/torch/_dynamo/eval_frame.py", line 804, in compile_wrapper
    return fn(*args, **kwargs)
  File "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", line 469, in forward_and_backward_pass

## Triton benchmark
import time

import torch
import triton
from torch._dynamo.device_interface import get_interface_for_device
from torch._inductor.runtime.static_cuda_launcher import StaticallyLaunchedCudaKernel

import sys
import os

## gist:7fe2723f3803ded2c2e81ba072fd16da
import time

import torch
import torch._inductor.config as config
from torch import Tensor
from torch._dynamo.device_interface import get_interface_for_device
from torch._inductor.runtime.static_cuda_launcher import StaticallyLaunchedCudaKernel
from torch._inductor.runtime.triton_compat import tl, triton

# Constants

## gist:afa19335c0aee85b24546b13c1cf6427
{
  "cache_state": "miss",
  "cache_status_detailed": "guard_miss",
  "cache_status_guard_expr": "L['t1']*L['t0'] < 2147483648 and L['t2']*L['t3'] < 2147483648 and 2 <= L['t0'] and 2 <= L['t1'] and 2 <= L['t2'] and 2 <= L['t3']",
  "components": [
    "[f5mdxkfzriesfvzu264hheibpxmhcat7d3an75ymc4edjb5bagj] aot_config: (0, True, False, False, False, [TensorPropertySource(base=LocalSource(local_name='x', is_input=True, dynamism=None, is_derefed_cell_contents=False), prop=<TensorProperty.SIZE: 0>, idx=0), TensorPropertySource(base=LocalSource(local_name='x', is_input=True, dynamism=None, is_derefed_cell_contents=False), prop=<TensorProperty.SIZE: 0>, idx=1), LocalSource(local_name='x', is_input=True, dynamism=None, is_derefed_cell_contents=False), TensorPropertySource(base=LocalSource(local_name='y', is_input=True, dynamism=None, is_derefed_cell_contents=False), prop=<TensorProperty.SIZE: 0>, idx=0), TensorPropertySource(base=LocalSource(local_name='y', is_input=True, dynamism=None, is_derefed_cell_contents=False

## gist:e8678af012a4f7bf3ccc5449e8725d5e
{
  "compilation_metrics_runtime": {
    "compile_id": "0/0",
    "frame_key": null,
    "co_name": null,
    "co_filename": null,
    "co_firstlineno": null,
    "cache_size": null,
    "accumulated_cache_size": null,
    "guard_count": null,

## microbenchmark.py
import torch
import time
import torch._inductor.config as config
from torch import Tensor
from torch._inductor.runtime.triton_compat import tl, triton

sp = time.time()
N = 100
template = """
def nop_kernel_{i}(arg0, arg1):

## gist:a76490e55e27641697cc60f172cab908
Triton compilation failed: triton_poi_fused_0
defb844cc147', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
    min_elem_per_thread=0
)
@triton.jit
def triton_(in_ptr0, out_ptr0, ynumel, xnumel, YBLOCK : tl.constexpr, XBLOCK : tl.constexpr):
    ynumel = 192
    xnumel = 49
    yoffset = tl.program_id(1) * (tl.program_id(2) + 1) * YBLOCK
    yindex = yoffset + tl.arange(0, YBLOCK)[None, :]

## .bashrc
# If not running interactively, don't do anything
case $- in
    *i*) ;;
      *) return;;
esac

# don't put duplicate lines or lines starting with space in the history.
# See bash(1) for more options
HISTCONTROL=ignoreboth

## gist:e5012c9ae4c22b3cb8a14cbabfff198e
# If not running interactively, don't do anything
case $- in
    *i*) ;;
      *) return;;
esac

# don't put duplicate lines or lines starting with space in the history.
# See bash(1) for more options
HISTCONTROL=ignoreboth

	loading model: 0it [00:00, ?it/s]
	loading model: 0it [00:03, ?it/s]
	ERROR:common:Backend dynamo failed in warmup()
	Traceback (most recent call last):
	File "/data/users/jjwu/a/pytorch/benchmarks/dynamo/common.py", line 2561, in warmup
	fn(model, example_inputs)
	File "/data/users/jjwu/a/pytorch/torch/_dynamo/eval_frame.py", line 804, in compile_wrapper
	return fn(args, *kwargs)
	File "/data/users/jjwu/a/pytorch/benchmarks/dynamo/torchbench.py", line 469, in forward_and_backward_pass
	import time

	import torch
	import triton
	from torch._dynamo.device_interface import get_interface_for_device
	from torch._inductor.runtime.static_cuda_launcher import StaticallyLaunchedCudaKernel

	import sys
	import os
	import time

	import torch
	import torch._inductor.config as config
	from torch import Tensor
	from torch._dynamo.device_interface import get_interface_for_device
	from torch._inductor.runtime.static_cuda_launcher import StaticallyLaunchedCudaKernel
	from torch._inductor.runtime.triton_compat import tl, triton

	# Constants
	{
	"cache_state": "miss",
	"cache_status_detailed": "guard_miss",
	"cache_status_guard_expr": "L['t1']L['t0'] < 2147483648 and L['t2']L['t3'] < 2147483648 and 2 <= L['t0'] and 2 <= L['t1'] and 2 <= L['t2'] and 2 <= L['t3']",
	"components": [
	"[f5mdxkfzriesfvzu264hheibpxmhcat7d3an75ymc4edjb5bagj] aot_config: (0, True, False, False, False, [TensorPropertySource(base=LocalSource(local_name='x', is_input=True, dynamism=None, is_derefed_cell_contents=False), prop=<TensorProperty.SIZE: 0>, idx=0), TensorPropertySource(base=LocalSource(local_name='x', is_input=True, dynamism=None, is_derefed_cell_contents=False), prop=<TensorProperty.SIZE: 0>, idx=1), LocalSource(local_name='x', is_input=True, dynamism=None, is_derefed_cell_contents=False), TensorPropertySource(base=LocalSource(local_name='y', is_input=True, dynamism=None, is_derefed_cell_contents=False), prop=<TensorProperty.SIZE: 0>, idx=0), TensorPropertySource(base=LocalSource(local_name='y', is_input=True, dynamism=None, is_derefed_cell_contents=False
	{
	"compilation_metrics_runtime": {
	"compile_id": "0/0",
	"frame_key": null,
	"co_name": null,
	"co_filename": null,
	"co_firstlineno": null,
	"cache_size": null,
	"accumulated_cache_size": null,
	"guard_count": null,
	Triton compilation failed: triton_poi_fused_0
	defb844cc147', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
	min_elem_per_thread=0
	)
	@triton.jit
	def triton_(in_ptr0, out_ptr0, ynumel, xnumel, YBLOCK : tl.constexpr, XBLOCK : tl.constexpr):
	ynumel = 192
	xnumel = 49
	yoffset = tl.program_id(1) * (tl.program_id(2) + 1) * YBLOCK
	yindex = yoffset + tl.arange(0, YBLOCK)[None, :]
	# If not running interactively, don't do anything
	case $- in
	i) ;;
	*) return;;
	esac

	# don't put duplicate lines or lines starting with space in the history.
	# See bash(1) for more options
	HISTCONTROL=ignoreboth