Skip to content

Instantly share code, notes, and snippets.

View davidberard98's full-sized avatar

David Berard davidberard98

  • Facebook
  • Menlo Park, CA
View GitHub Profile
/home/dberard/local/pytorch/torch/backends/cudnn/__init__.py:106: UserWarning: PyTorch was compiled without cuDNN/MIOpen support. To use cuDNN/MIOpen, rebuild PyTorch making sure the library is visible to the build system.
warnings.warn(
/home/dberard/local/pytorch/torch/backends/cudnn/__init__.py:106: UserWarning: PyTorch was compiled without cuDNN/MIOpen support. To use cuDNN/MIOpen, rebuild PyTorch making sure the library is visible to the build system.
warnings.warn(
/home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/z3/z3core.py:5: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
import pkg_resources
/home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pkg_resources/__init__.py:2871: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('ruamel')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See ht
E
======================================================================
ERROR: test_torch_function_call_to_size_within_aot_autograd_graph (__main__.TestNestedTensor.test_torch_function_call_to_size_within_aot_autograd_graph)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/data/users/dberard/pytorch/torch/testing/_internal/common_utils.py", line 2739, in wrapper
method(*args, **kwargs)
File "/data/users/dberard/pytorch/test/dynamo/test_subclasses.py", line 1403, in test_torch_function_call_to_size_within_aot_autograd_graph
compiled_fn(x, y)
File "/data/users/dberard/pytorch/torch/_dynamo/eval_frame.py", line 451, in _fn
@davidberard98
davidberard98 / code.cpp
Last active March 14, 2024 17:21
bad hashing with flat_hash_map
/*
* g++ code.cpp -o code -std=c++17
*
* this demonstrates exponential memory usage with flat_hash_map
*/
#include <iostream>
#include "flat_hash_map.hpp"
struct BadHash {
// -----// IR Dump Before Inliner (inline) ('builtin.module' operation) //----- //
#loc = loc("/tmp/torchinductor_dberard/yl/cylyrnw2l3cnpot655rarv4ha622jmc4jnwrpsntqcnq4difzu25.py":18:0)
#loc62 = loc("/home/dberard/local/pytorch/torch/_inductor/triton_helpers.py":90:0)
#loc64 = loc(unknown)
#loc66 = loc("/home/dberard/local/pytorch/torch/_inductor/triton_helpers.py":69:0)
#loc84 = loc("/home/dberard/local/pytorch/torch/_inductor/triton_helpers.py":12:0)
#loc87 = loc("/home/dberard/local/pytorch/torch/_inductor/triton_helpers.py":6:0)
module {
tt.func public @triton__0d1d23de(%arg0: !tt.ptr<f64, 1> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/yl/cylyrnw2l3cnpot655rarv4ha622jmc4jnwrpsntqcnq4difzu25.py":18:0), %arg1: !tt.ptr<i64, 1> {tt.divisibility = 16 : i32} loc("/tmp/torchinductor_dberard/yl/cylyrnw2l3cnpot655rarv4ha622jmc4jnwrpsntqcnq4difzu25.py":18:0), %arg2: i32 loc("/tmp/torchinductor_dberard/yl/cylyrnw2l3cnpot655rarv4ha622jmc4jnwrpsntqcnq4difzu25.py":18:0), %arg3: i32 {tt.divisibility
This file has been truncated, but you can view the full file.
loading model: 0it [00:00, ?it/s]WARNING:common:Model detectron2_fcos_r_50_fpn does not support bfloat16, running with amp instead
loading model: 0it [00:08, ?it/s]
WARNING:common:Model detectron2_fcos_r_50_fpn does not support bfloat16, running with amp instead
cuda eval detectron2_fcos_r_50_fpn
WARNING:common:Model detectron2_fcos_r_50_fpn does not support bfloat16, running with amp instead
[2023-11-01 20:50:22,157] torch._dynamo.eval_frame: [DEBUG] skipping helper /home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/contextlib.py
[2023-11-01 20:50:22,157] torch._dynamo.eval_frame: [DEBUG] skipping __init__ /home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/contextlib.py
[2023-11-01 20:50:22,157] torch._dynamo.eval_frame: [DEBUG] skipping __enter__ /home/dberard/local/miniconda3/envs/pytorch/lib/python3.10/contextlib.py
[2023-11-01 20:50:22,157] torch._dynamo.eval_frame: [DEBUG] skipping backend_cache_wrapper /home/dberard/local/pytorch/torch/_dynamo/eval_frame.py
[2023-11-01 20:
/data/users/dberard/scripts/oncall/112502.py:7: UserWarning: An output with one or more elements was resized since it had shape [9, 10], which does not match the required output shape [9]. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0). (Triggered internally at ../aten/src/ATen/native/Resize.cpp:28.)
x = torch.diag(input=x, diagonal=0,out=torch.rand([9, 10], dtype=torch.float32).to('cpu'))
build succeded
/data/users/dberard/pytorch/torch/_prims_common/wrappers.py:159: UserWarning: An output with one or more elements was resized since it had shape torch.Size([9, 10]) which does not match the required output shape {str(shape)}. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.re
/data/users/dberard/scripts/oncall/112494.py:6: UserWarning: An output with one or more elements was resized since it had shape [10, 9, 8], which does not match the required output shape [1, 9, 8]. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0). (Triggered internally at ../aten/src/ATen/native/Resize.cpp:28.)
x = torch.var(correction=4, dim=0, input=x, keepdim=True, out=torch.rand_like(x))
/data/users/dberard/pytorch/torch/_prims_common/wrappers.py:159: UserWarning: An output with one or more elements was resized since it had shape torch.Size([s0, s1, s2]) which does not match the required output shape {str(shape)}. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0).
Traceback (most recent call last):
File "/data/users/dberard/scripts/oncall/112489.py", line 8, in <module>
fn_opt(*inputs)
File "/data/users/dberard/pytorch/torch/_dynamo/eval_frame.py", line 411, in _fn
return fn(*args, **kwargs)
File "/data/users/dberard/pytorch/torch/_dynamo/eval_frame.py", line 559, in catch_errors
return callback(frame, cache_entry, hooks, frame_state)
File "/data/users/dberard/pytorch/torch/_dynamo/convert_frame.py", line 687, in _convert_frame
result = inner_convert(frame, cache_entry, hooks, frame_state)
File "/data/users/dberard/pytorch/torch/_dynamo/convert_frame.py", line 148, in _fn
import torch
def fn(x, y):
return torch.cat([x + y, y]).sin()
a = torch.ones((1024, 256), dtype=torch.float32)
b = torch.ones((1024, 256), dtype=torch.float32) * 2
with torch.profiler.profile(schedule=torch.profiler.schedule(wait=2, warmup=2, repeat=1, active=2), record_shapes=True) as prof:
for _ in range(8):
import torch
import triton
import triton.language as tl
@triton.jit
def dense_to_jagged_triton(
inverse_offsets_ptr, offsets_ptr, dense_ptr, out_ptr0, xnumel, XBLOCK: tl.constexpr
):
# xnumel = 33106688
xoffset = tl.program_id(0) * XBLOCK