Skip to content

Instantly share code, notes, and snippets.

View anijain2305's full-sized avatar

Animesh Jain anijain2305

  • Meta Platforms
View GitHub Profile
WARNING: torch sampled_addmm does not support batch indices. Benchmarked by iterating over batches. Can be improved significantly
Swin Transformer
[----------------------------------------------------------- sddmm ------------------------------------------------------------]
| torch_dense | torch_sddmm | csr_sputnik | csr_ge | coo_ge | csr_to_coo
1 threads: ---------------------------------------------------------------------------------------------------------------------
B= 96, M=3136, K= 32, prob=0.0000 | 9532.0 | 47358.3 | 20816.9 | 106767.1 | 44071.4 | 259.6
B= 192, M= 784, K= 32, prob=0.7500 | 1168.4 | 9095.9 | 728.3 | 2942.2 | 1480.8 | 117.1
B= 384, M= 196, K= 32, prob=0.9375 | 171.0 | 17444.5 | 103.0 | 91.8 | 148.3 | 117.3
B= 768, M= 49, K= 32, prob=0.9844 | 78.6 | 34361.4 | 18.7 | 12.8 | 145.
import importlib
import gc
import os
import sys
import logging
import torch
from os.path import abspath
from os.path import exists
import itertools
(/scratch/anijain/work/env) anijain@a100-st-p4d24xlarge-58 /scratch/anijain/work/torchdynamo (wconstab/dynamic) $ python benchmarks/ --dynamic_shapes --training --nvfuser --accuracy-aot-ts-mincut --devices cuda --repeat 1 -k hf_Bert
cuda train hf_Bert DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT Autograd because graph has mutation
DEBUG Unable to use AOT
set -x -e
cd /data/home/binbao/cluster/torchdynamo-update-pin/
git checkout main && git pull && git branch -D binbao/update_pytorch_pin && git checkout binbao/update_pytorch_pin && git rebase main
nightly=`date +'%Y%m%d'`
sed -i 's/PYTORCH_VERSION ?= dev.*/PYTORCH_VERSION ?= dev'$nightly'/' Makefile
sed -i 's/\.dev.*+/\.dev'$nightly'+/g'
git commit -a -m "Update PyTorch pin"
git push -f
import torch
from torch import tensor, device
import torch.fx as fx
from torchdynamo.testing import rand_strided
from math import inf
from torch.fx.experimental.proxy_tensor import make_fx
# torch version: 1.13.0a0+git071f875
import triton
import triton.language as tl
from import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32',
import triton
import triton.language as tl
from import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32',
This file has been truncated, but you can view the full file.
from ctypes import c_void_p, c_long
import torch
import random
from torch import empty_strided, as_strided, device
from torchinductor.codecache import AsyncCompile
aten = torch.ops.aten
async_compile = AsyncCompile()
import triton
import triton.language as tl
from import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32',
from ctypes import c_void_p, c_long
import torch
import random
from torch import empty_strided, as_strided, device
from torchinductor.codecache import AsyncCompile
aten = torch.ops.aten
async_compile = AsyncCompile()