Skip to content

Instantly share code, notes, and snippets.

TORCH_LOGS="recompiles" PYTORCH_TEST_WITH_DYNAMO=1 python test/test_optim.py -k test_foreach_matches_forloop_AdamW_cuda
[WARNING]:Profiler function <class 'torch.autograd.profiler.record_function'> will be ignored
[DEBUG]:Recompiling function wrapper in /data/users/mlazos/pytorch/torch/optim/optimizer.py:463
triggered by the following guard failure(s):
- ___check_obj_id(L['args'][0], 139797595855440)
[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adamw.py:190
triggered by the following guard failure(s):
- ___check_obj_id(L['self'], 139797595855440)
[DEBUG]:Recompiling function step in /data/users/mlazos/pytorch/torch/optim/adamw.py:190
triggered by the following guard failure(s):
--> TORCH_LOGS="dynamo" python nvembed.py
setup passages
A new version of the following files was downloaded from https://huggingface.co/nvidia/NV-Embed-v1:
- configuration_nvembed.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/nvidia/NV-Embed-v1:
- modeling_nvembed.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Downloading shards: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 9.86it/s]
Loading checkpoint shards: 100%|████████████████████████████████████████████████████| 4/4 [00:03<00:00, 1.16it/s]
--> TORCH_LOGS="dynamo" python nvembed.py
setup passages
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████| 4/4 [00:03<00:00, 1.07it/s]
downloaded model
[INFO]:Step 1: torchdynamo start tracing fn /data/users/mlazos/empathy_day/nvembed.py:31
[INFO]:Restarting analysis due to _dynamo/symbolic_convert.py:148 in fail_and_restart_analysis
[INFO]:Step 1: torchdynamo start tracing fn /data/users/mlazos/empathy_day/nvembed.py:31
[INFO]:produce_guards
[INFO]:Step 1: torchdynamo start tracing encode /home/mlazos/.cache/huggingface/modules/transformers_modules/nvidia/NV-Embed-v1/97aefcdd69565404f4a24de8ca4eb8114cb25ff0/modeling_nvembed.py:403
[INFO]:Restarting analysis due to _dynamo/symbolic_convert.py:148 in fail_and_restart_analysis
import torch
from torch.optim.lr_scheduler import LambdaLR, ChainedScheduler, ConstantLR, SequentialLR
from torch.testing._internal.common_utils import CudaMemoryLeakCheck, TestCase, run_tests
def chained_fn():
from torch.testing._internal.common_utils import CudaMemoryLeakCheck
with CudaMemoryLeakCheck(None,name="hi"):
device="cuda:0"
dtype=torch.float32
optim_cls = torch.optim.ASGD
`--> TORCH_LOGS="output_code" python optim_repro.py
[WARNING]:Profiler function <class 'torch.autograd.profiler.record_function'> will be ignored
[DEBUG]:Output code:
# AOT ID: ['0_inference']
from ctypes import c_void_p, c_long
import torch
import math
import random
import os
AlbertForMaskedLM
AlbertForQuestionAnswering
BartForCausalLM
BartForConditionalGeneration
BertForMaskedLM
BertForQuestionAnswering
BlenderbotSmallForCausalLM
BlenderbotSmallForConditionalGeneration
DebertaForMaskedLM
DebertaForQuestionAnswering
import torch
import torch._dynamo as torchdynamo
import torch._inductor
import time
import torch._inductor.config as config
from torch._dynamo.utils import cprofile_wrapper
from apex.optimizers import FusedAdam, FusedSGD
config.triton.cudagraphs = True
config.cpp_wrapper = False
def init_state_per_param(self, param, param_group):
state = self.state[param]
if len(state) == 0:
# note(crcrpar): [special device hosting for step]
# Deliberately host `step` on CPU if both capturable and fused are off.
# This is because kernel launches are costly on CUDA and XLA.
state['step'] = (
torch.zeros((), dtype=_get_scalar_dtype(is_fused=param_group['fused']), device=param.device)
if param_group['capturable'] or param_group['fused']
else torch.tensor(0.0, dtype=_get_scalar_dtype())
import torch
import torch._dynamo as torchdynamo
import torch._inductor
import time
import torch._inductor.config as config
from torch._dynamo.utils import cprofile_wrapper
from apex.optimizers import FusedAdam, FusedSGD
config.triton.cudagraphs = True
config.cpp_wrapper = False
from torch._dynamo.decorators import mark_static_address
@torch.compile(mode="reduce-overhead")
def foo(inp):
return inp + 1
inp = torch.rand([20, 20], device="cuda")
mark_static_address(inp, guard=False)
foo(inp)
inp = torch.ones([20, 20], device="cuda")