Skip to content

Instantly share code, notes, and snippets.

@chauhang
Created March 27, 2024 03:10
Show Gist options
  • Save chauhang/5e24b532d3cbb0dd3dbc7e2401a17cd6 to your computer and use it in GitHub Desktop.
Save chauhang/5e24b532d3cbb0dd3dbc7e2401a17cd6 to your computer and use it in GitHub Desktop.
torchtrain 1b model checkpoint failure flight recorder dump
This file has been truncated, but you can view the full file.
{
"traceEvents": [
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500078064502,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 1,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500078064602,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500078203477,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 2,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500078203577,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082256013,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 3,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082256113,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082294375,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 4,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082294475,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082389172,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 5,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082389272,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082489295,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 6,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082489295,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 6,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082489295,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 6,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082489295,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 6,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082489295,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 6,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082489295,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 6,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082489395,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082489395,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082489395,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082489395,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082489395,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082489395,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082493851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 7,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082493851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 7,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082493851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 7,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082493851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 7,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082493851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 7,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082493851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 7,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082493951,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082493951,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082493951,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082493951,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082493951,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082493951,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082640009,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 8,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082640009,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 8,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082640009,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 8,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082640009,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 8,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082640009,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 8,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082640009,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 8,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082640109,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082640109,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082640109,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082640109,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082640109,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082640109,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082664881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 9,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082664981,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500082733976,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 10,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082734076,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500082737123,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 11,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082737223,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "main:378",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "set_pg_timeouts:50",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "barrier:3686",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082738706,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 12,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
1
]
]
}
},
{
"name": "barrier:3686",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "set_pg_timeouts:50",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "main:378",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082738806,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082778689,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 13,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082778789,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082779699,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 14,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082779799,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082782360,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 15,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082782460,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082799888,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 16,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082799988,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082801573,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 17,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082801673,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082809160,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 18,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082809160,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 18,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082809160,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 18,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082809160,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 18,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082809160,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 18,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082809160,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 18,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082809260,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082809260,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082809260,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082809260,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082809260,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082809260,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082814429,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 19,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082814429,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 19,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082814429,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 19,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082814429,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 19,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082814429,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 19,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082814429,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 19,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082814529,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082814529,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082814529,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082814529,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082814529,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082814529,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082890579,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 20,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082890579,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 20,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082890579,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 20,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082890579,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 20,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082890579,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 20,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082890579,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 20,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082890679,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082890679,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082890679,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082890679,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082890679,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082890679,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082891745,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 21,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082891845,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500082892957,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 22,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082893057,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500082893451,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 23,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500082893551,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082935568,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 24,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082935668,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082936595,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 25,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082936695,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082938982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 26,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082939082,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082953189,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 27,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082953289,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082954684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 28,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082954784,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082961821,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 29,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082961821,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 29,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082961821,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 29,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082961821,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 29,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082961821,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 29,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082961821,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 29,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082961921,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082961921,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082961921,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082961921,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082961921,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082961921,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500082967042,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 30,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500082967042,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 30,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082967042,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 30,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500082967042,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 30,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500082967042,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 30,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500082967042,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 30,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500082967142,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500082967142,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082967142,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500082967142,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500082967142,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500082967142,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083044539,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 31,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083044539,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 31,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083044539,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 31,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083044539,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 31,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083044539,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 31,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083044539,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 31,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083044639,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083044639,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083044639,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083044639,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083044639,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083044639,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083046379,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 32,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083046479,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083047546,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 33,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083047646,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083047942,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 34,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083048042,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083085658,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 35,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083085758,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083086582,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 36,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083086682,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083089075,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 37,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083089175,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083106227,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 38,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083106327,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083107735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 39,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083107835,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083114977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 40,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083114977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 40,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083114977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 40,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083114977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 40,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083114977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 40,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083114977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 40,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083115077,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083115077,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083115077,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083115077,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083115077,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083115077,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083120272,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 41,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083120272,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 41,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083120272,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 41,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083120272,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 41,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083120272,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 41,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083120272,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 41,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083120372,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083120372,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083120372,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083120372,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083120372,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083120372,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083198088,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 42,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083198088,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 42,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083198088,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 42,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083198088,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 42,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083198088,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 42,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083198088,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 42,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083198188,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083198188,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083198188,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083198188,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083198188,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083198188,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083199492,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 43,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083199592,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083200482,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 44,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083200582,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083200787,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 45,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083200887,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083239020,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 46,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083239120,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083239881,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 47,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083239981,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083242249,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 48,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083242349,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083257684,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 49,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083257784,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083259603,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 50,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083259703,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083267425,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 51,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083267425,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 51,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083267425,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 51,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083267425,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 51,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083267425,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 51,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083267425,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 51,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083267525,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083267525,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083267525,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083267525,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083267525,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083267525,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083272843,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 52,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083272843,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 52,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083272843,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 52,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083272843,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 52,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083272843,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 52,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083272843,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 52,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083272943,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083272943,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083272943,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083272943,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083272943,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083272943,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083349531,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 53,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083349531,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 53,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083349531,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 53,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083349531,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 53,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083349531,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 53,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083349531,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 53,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083349631,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083349631,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083349631,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083349631,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083349631,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083349631,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083350982,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 54,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083351082,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083352055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 55,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083352155,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083352641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 56,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083352741,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083391673,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 57,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083391773,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083392892,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 58,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083392992,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083395644,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 59,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083395744,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083412871,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 60,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083412971,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083414493,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 61,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083414593,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083422083,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 62,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083422083,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 62,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083422083,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 62,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083422083,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 62,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083422083,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 62,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083422083,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 62,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083422183,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083422183,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083422183,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083422183,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083422183,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083422183,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083427330,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 63,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083427330,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 63,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083427330,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 63,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083427330,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 63,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083427330,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 63,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083427330,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 63,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083427430,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083427430,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083427430,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083427430,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083427430,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083427430,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083503789,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 64,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083503789,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 64,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083503789,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 64,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083503789,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 64,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083503789,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 64,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083503789,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 64,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083503889,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083503889,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083503889,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083503889,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083503889,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083503889,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083504885,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 65,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083504985,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083506006,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 66,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083506106,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083506851,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 67,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083506951,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083547876,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 68,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083547976,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083549195,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 69,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083549295,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083551778,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 70,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083551878,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083568055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 71,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083568155,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083569968,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 72,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083570068,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083577735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 73,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083577735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 73,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083577735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 73,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083577735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 73,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083577735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 73,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083577735,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 73,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083577835,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083577835,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083577835,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083577835,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083577835,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083577835,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083583169,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 74,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083583169,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 74,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083583169,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 74,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083583169,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 74,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083583169,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 74,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083583169,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 74,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083583269,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083583269,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083583269,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083583269,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083583269,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083583269,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083660055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 75,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083660055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 75,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083660055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 75,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083660055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 75,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083660055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 75,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083660055,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 75,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083660155,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083660155,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083660155,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083660155,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083660155,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083660155,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083662077,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 76,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083662177,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083663697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 77,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083663797,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083664148,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 78,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083664248,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083705557,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 79,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083705657,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083706528,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 80,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083706628,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083709190,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 81,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083709290,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083724935,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 82,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083725035,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083726590,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 83,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083726690,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083734146,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 84,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083734146,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 84,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083734146,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 84,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083734146,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 84,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083734146,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 84,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083734146,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 84,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083734246,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083734246,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083734246,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083734246,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083734246,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083734246,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083739418,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 85,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083739418,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 85,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083739418,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 85,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083739418,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 85,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083739418,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 85,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083739418,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 85,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083739518,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083739518,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083739518,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083739518,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083739518,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083739518,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083815755,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 86,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083815755,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 86,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083815755,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 86,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083815755,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 86,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083815755,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 86,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083815755,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 86,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083815855,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083815855,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083815855,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083815855,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083815855,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083815855,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083816994,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 87,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083817094,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083818575,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 88,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083818675,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083819137,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 89,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083819237,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083858463,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 90,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083858563,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083859326,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 91,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083859426,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083861886,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 92,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083861986,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083896166,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 93,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083896266,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083897883,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 94,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083897983,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083905743,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 95,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083905743,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 95,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083905743,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 95,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083905743,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 95,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083905743,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 95,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083905743,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 95,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083905843,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083905843,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083905843,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083905843,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083905843,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083905843,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083911470,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 96,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083911470,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 96,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083911470,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 96,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083911470,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 96,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083911470,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 96,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083911470,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 96,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083911570,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083911570,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083911570,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083911570,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083911570,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083911570,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500083988163,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 97,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083988163,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 97,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083988163,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 97,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500083988163,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 97,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083988163,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 97,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083988163,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 97,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083988263,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083988263,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083988263,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500083988263,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083988263,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500083988263,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500083989782,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 98,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083989882,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083991199,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 99,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083991299,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500083991641,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 100,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500083991741,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084033448,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 101,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084033548,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084034638,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 102,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084034738,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084037687,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 103,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward_unshard:415",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_pre_forward:380",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "forward:843",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "forward:509",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "forward:857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_call_impl:1536",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "_wrapped_call_impl:1527",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "main:265",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084037787,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084052081,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 104,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084052181,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084054321,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 105,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "_prefetch_handle:1216",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "_pre_backward_hook:676",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084054421,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084063727,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 106,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084063727,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 106,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084063727,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 106,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084063727,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 106,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084063727,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 106,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084063727,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 106,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084063827,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084063827,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084063827,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084063827,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084063827,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084063827,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084069651,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 107,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084069651,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 107,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084069651,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 107,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084069651,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 107,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084069651,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 107,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084069651,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 107,
"state": "completed",
"input_sizes": [
[
852480
]
],
"output_sizes": [
[
106560
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084069751,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084069751,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084069751,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084069751,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084069751,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084069751,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084144410,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 108,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084144410,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 108,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084144410,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 108,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084144410,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 108,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084144410,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 108,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084144410,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 108,
"state": "completed",
"input_sizes": [
[
16384256
]
],
"output_sizes": [
[
2048032
]
]
}
},
{
"name": "reduce_scatter_tensor:3362",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084144510,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084144510,
"pid": 0,
"tid": 4
},
{
"name": "_reduce_grad:841",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084144510,
"pid": 0,
"tid": 4
},
{
"name": "_post_backward_hook:749",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084144510,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084144510,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_reduce_scatter_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084144510,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084145965,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 109,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:2222",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "clip_grad_norm_:1169",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "main:276",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500084146065,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500084147289,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 110,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "dist_mean:23",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "main:303",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500084147389,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "B",
"ts": 1711500084147718,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 111,
"state": "completed",
"input_sizes": [
[]
],
"output_sizes": [
[]
]
}
},
{
"name": "__call__:860",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "all_reduce:188",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "dist_max:18",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "main:304",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_reduce",
"cat": "completed",
"ph": "E",
"ts": 1711500084147818,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "state_dict:33",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "get_model_state_dict:652",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_get_model_state_dict:333",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "state_dict:1907",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_pre_state_dict_hook:786",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_sharded_pre_state_dict_hook:531",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_common_unshard_pre_state_dict_hook:174",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_enter_unshard_params_ctx:138",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "__enter__:137",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard_fsdp_state_params:196",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084150697,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 112,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_unshard_fsdp_state_params:196",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "__enter__:137",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_enter_unshard_params_ctx:138",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_common_unshard_pre_state_dict_hook:174",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_sharded_pre_state_dict_hook:531",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_pre_state_dict_hook:786",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1907",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_get_model_state_dict:333",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "get_model_state_dict:652",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:33",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084150797,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:33",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "get_model_state_dict:652",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_get_model_state_dict:333",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1907",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_state_dict_hook:786",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_sharded_pre_state_dict_hook:531",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_common_unshard_pre_state_dict_hook:174",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_enter_unshard_params_ctx:138",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "__enter__:137",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard_fsdp_state_params:196",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084151602,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 113,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_unshard_fsdp_state_params:196",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "__enter__:137",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_enter_unshard_params_ctx:138",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_common_unshard_pre_state_dict_hook:174",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_sharded_pre_state_dict_hook:531",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_pre_state_dict_hook:786",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1907",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_get_model_state_dict:333",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "get_model_state_dict:652",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:33",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084151702,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:33",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "get_model_state_dict:652",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_get_model_state_dict:333",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:1907",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_pre_state_dict_hook:786",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_sharded_pre_state_dict_hook:531",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_common_unshard_pre_state_dict_hook:174",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_enter_unshard_params_ctx:138",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "__enter__:137",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard_fsdp_state_params:196",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084155202,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 114,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_all_gather_flat_param:1399",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "unshard:1308",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_unshard:299",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_unshard_fsdp_state_params:196",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "__enter__:137",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_enter_unshard_params_ctx:138",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_common_unshard_pre_state_dict_hook:174",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_sharded_pre_state_dict_hook:531",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_pre_state_dict_hook:786",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1907",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:1911",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_get_model_state_dict:333",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "get_model_state_dict:652",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:33",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084155302,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_optim_state_dict:1956",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_map_param_key_to_optim_keys:1232",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather_object:2439",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather:2857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084246852,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 115,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather:2857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "all_gather_object:2439",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "_map_param_key_to_optim_keys:1232",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1956",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "E",
"ts": 1711500084246952,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "_optim_state_dict:1956",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "_map_param_key_to_optim_keys:1232",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "all_gather_object:2451",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "all_gather:2857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084292341,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 116,
"state": "completed",
"input_sizes": [
[
355
]
],
"output_sizes": [
[
8,
355
]
]
}
},
{
"name": "all_gather:2857",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "all_gather_object:2451",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "_map_param_key_to_optim_keys:1232",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1956",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "E",
"ts": 1711500084292441,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather_object:2439",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084297977,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 117,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "all_gather_object:2439",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "E",
"ts": 1711500084298077,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "all_gather_object:2451",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084298626,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 118,
"state": "completed",
"input_sizes": [
[
1048
]
],
"output_sizes": [
[
8,
1048
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "all_gather_object:2451",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "E",
"ts": 1711500084298726,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_gather_all_orig_param_state:1688",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_allgather_orig_param_states:1634",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084301177,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 119,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_allgather_orig_param_states:1634",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_gather_all_orig_param_state:1688",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084301277,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_gather_all_orig_param_state:1688",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "_allgather_orig_param_states:1634",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084303139,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 120,
"state": "completed",
"input_sizes": [
[
2048032
]
],
"output_sizes": [
[
16384256
]
]
}
},
{
"name": "all_gather_into_tensor:2951",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_allgather_orig_param_states:1634",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_gather_all_orig_param_state:1688",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "E",
"ts": 1711500084303239,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather_object:2439",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084305279,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 121,
"state": "completed",
"input_sizes": [
[
1
]
],
"output_sizes": [
[
8,
1
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "all_gather_object:2439",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "E",
"ts": 1711500084305379,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "all_gather_object:2451",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "B",
"ts": 1711500084306089,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 122,
"state": "completed",
"input_sizes": [
[
1721
]
],
"output_sizes": [
[
8,
1721
]
]
}
},
{
"name": "all_gather:2859",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "all_gather_object:2451",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:75",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_allgather_state_info:1334",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_gather_all_orig_param_state:1687",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "nccl:all_gather",
"cat": "completed",
"ph": "E",
"ts": 1711500084306189,
"pid": 0,
"tid": 4
},
{
"name": "nccl:_all_gather_base",
"cat": "completed",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "<module>:389",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "wrapper:347",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "main:368",
"cat": "/home/gchauhan/meta/torchtrain/train.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "save:114",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "inner_func:427",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "save:146",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_stateful_to_state_dict:236",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "state_dict:45",
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "get_optimizer_state_dict:697",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_get_optim_state_dict:466",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "optim_state_dict:1847",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_optim_state_dict_impl:1270",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "decorate_context:115",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_optim_state_dict:1971",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
"input_sizes": [
[
106560
]
],
"output_sizes": [
[
852480
]
]
}
},
{
"name": "_convert_state_with_orig_params:1794",
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py",
"ph": "B",
"ts": 1711500084308939,
"pid": 0,
"tid": 4,
"args": {
"seq_id": 123,
"state": "completed",
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment