This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`--> TORCH_LOGS="output_code" python optim_repro.py | |
[WARNING]:Profiler function <class 'torch.autograd.profiler.record_function'> will be ignored | |
[DEBUG]:Output code: | |
# AOT ID: ['0_inference'] | |
from ctypes import c_void_p, c_long | |
import torch | |
import math | |
import random | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AlbertForMaskedLM | |
AlbertForQuestionAnswering | |
BartForCausalLM | |
BartForConditionalGeneration | |
BertForMaskedLM | |
BertForQuestionAnswering | |
BlenderbotSmallForCausalLM | |
BlenderbotSmallForConditionalGeneration | |
DebertaForMaskedLM | |
DebertaForQuestionAnswering |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch._dynamo as torchdynamo | |
import torch._inductor | |
import time | |
import torch._inductor.config as config | |
from torch._dynamo.utils import cprofile_wrapper | |
from apex.optimizers import FusedAdam, FusedSGD | |
config.triton.cudagraphs = True | |
config.cpp_wrapper = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def init_state_per_param(self, param, param_group): | |
state = self.state[param] | |
if len(state) == 0: | |
# note(crcrpar): [special device hosting for step] | |
# Deliberately host `step` on CPU if both capturable and fused are off. | |
# This is because kernel launches are costly on CUDA and XLA. | |
state['step'] = ( | |
torch.zeros((), dtype=_get_scalar_dtype(is_fused=param_group['fused']), device=param.device) | |
if param_group['capturable'] or param_group['fused'] | |
else torch.tensor(0.0, dtype=_get_scalar_dtype()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch._dynamo as torchdynamo | |
import torch._inductor | |
import time | |
import torch._inductor.config as config | |
from torch._dynamo.utils import cprofile_wrapper | |
from apex.optimizers import FusedAdam, FusedSGD | |
config.triton.cudagraphs = True | |
config.cpp_wrapper = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch._dynamo.decorators import mark_static_address | |
@torch.compile(mode="reduce-overhead") | |
def foo(inp): | |
return inp + 1 | |
inp = torch.rand([20, 20], device="cuda") | |
mark_static_address(inp, guard=False) | |
foo(inp) | |
inp = torch.ones([20, 20], device="cuda") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch._dynamo as torchdynamo | |
import torch._inductor | |
import time | |
import torch._inductor.config as config | |
from apex.optimizers import FusedAdam, FusedSGD | |
config.triton.cudagraphs = True | |
config.cpp_wrapper = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
torch.set_default_device("cuda") | |
import logging | |
logger = logging.getLogger(__name__) | |
@torch.compile() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[DEBUG]:MODIFIED BYTECODE precondition2 /opt/dlami/nvme/mlazos/test/a.py line 10 | |
10 0 LOAD_GLOBAL 8 (__compiled_fn_0) | |
2 LOAD_FAST 1 (_masked_preconditioner_list) | |
4 LOAD_CONST 8 (0) | |
6 BINARY_SUBSCR | |
8 LOAD_FAST 1 (_masked_preconditioner_list) | |
10 LOAD_CONST 9 (1) | |
12 BINARY_SUBSCR | |
14 CALL_FUNCTION 2 | |
16 STORE_FAST 6 (graph_out_0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python huggingface.py --training --float32 --performance --only MobileBertForQuestionAnswering --backend=inductor |
NewerOlder