Skip to content

Instantly share code, notes, and snippets.

View anijain2305's full-sized avatar

Animesh Jain anijain2305

  • Meta Platforms
View GitHub Profile
WARNING: torch sampled_addmm does not support batch indices. Benchmarked by iterating over batches. Can be improved significantly
Swin Transformer
[----------------------------------------------------------- sddmm ------------------------------------------------------------]
| torch_dense | torch_sddmm | csr_sputnik | csr_ge | coo_ge | csr_to_coo
1 threads: ---------------------------------------------------------------------------------------------------------------------
B= 96, M=3136, K= 32, prob=0.0000 | 9532.0 | 47358.3 | 20816.9 | 106767.1 | 44071.4 | 259.6
B= 192, M= 784, K= 32, prob=0.7500 | 1168.4 | 9095.9 | 728.3 | 2942.2 | 1480.8 | 117.1
B= 384, M= 196, K= 32, prob=0.9375 | 171.0 | 17444.5 | 103.0 | 91.8 | 148.3 | 117.3
B= 768, M= 49, K= 32, prob=0.9844 | 78.6 | 34361.4 | 18.7 | 12.8 | 145.
import importlib
import gc
import os
import sys
import logging
import torch
from os.path import abspath
from os.path import exists
import itertools
(/scratch/anijain/work/env) anijain@a100-st-p4d24xlarge-58 /scratch/anijain/work/torchdynamo (wconstab/dynamic) $ python benchmarks/torchbench.py --dynamic_shapes --training --nvfuser --accuracy-aot-ts-mincut --devices cuda --repeat 1 -k hf_Bert
cuda train hf_Bert DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT Autograd because graph has mutation
DEBUG torchdynamo.optimizations.training: Unable to use AOT
#!/bin/bash
set -x -e
cd /data/home/binbao/cluster/torchdynamo-update-pin/
git checkout main && git pull && git branch -D binbao/update_pytorch_pin && git checkout binbao/update_pytorch_pin && git rebase main
nightly=`date +'%Y%m%d'`
sed -i 's/PYTORCH_VERSION ?= dev.*/PYTORCH_VERSION ?= dev'$nightly'/' Makefile
sed -i 's/\.dev.*+/\.dev'$nightly'+/g' README.md
git commit -a -m "Update PyTorch pin"
git push -f
import torch
from torch import tensor, device
import torch.fx as fx
from torchdynamo.testing import rand_strided
from math import inf
from torch.fx.experimental.proxy_tensor import make_fx
# torch version: 1.13.0a0+git071f875
import triton
import triton.language as tl
from torchinductor.ir import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32',
import triton
import triton.language as tl
from torchinductor.ir import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32',
This file has been truncated, but you can view the full file.
from ctypes import c_void_p, c_long
import torch
import random
from torch import empty_strided, as_strided, device
from torchinductor.codecache import AsyncCompile
aten = torch.ops.aten
async_compile = AsyncCompile()
import triton
import triton.language as tl
from torchinductor.ir import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32',
from ctypes import c_void_p, c_long
import torch
import random
from torch import empty_strided, as_strided, device
from torchinductor.codecache import AsyncCompile
aten = torch.ops.aten
async_compile = AsyncCompile()