-
-
Save chauhang/5e24b532d3cbb0dd3dbc7e2401a17cd6 to your computer and use it in GitHub Desktop.
torchtrain 1b model checkpoint failure flight recorder dump
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"traceEvents": [ | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500078064502, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 1, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500078064602, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500078203477, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 2, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500078203577, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082256013, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 3, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082256113, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082294375, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 4, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082294475, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082389172, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 5, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082389272, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082489295, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 6, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082489295, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 6, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082489295, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 6, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082489295, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 6, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082489295, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 6, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082489295, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 6, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082489395, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082489395, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082489395, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082489395, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082489395, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082489395, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082493851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 7, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082493851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 7, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082493851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 7, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082493851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 7, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082493851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 7, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082493851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 7, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082493951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082493951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082493951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082493951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082493951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082493951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082640009, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 8, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082640009, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 8, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082640009, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 8, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082640009, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 8, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082640009, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 8, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082640009, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 8, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082640109, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082640109, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082640109, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082640109, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082640109, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082640109, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082664881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 9, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082664981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500082733976, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 10, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082734076, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500082737123, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 11, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082737223, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:378", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "set_pg_timeouts:50", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "barrier:3686", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082738706, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 12, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "barrier:3686", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "set_pg_timeouts:50", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:378", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082738806, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082778689, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 13, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082778789, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082779699, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 14, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082779799, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082782360, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 15, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082782460, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082799888, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 16, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082799988, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082801573, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 17, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082801673, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082809160, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 18, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082809160, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 18, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082809160, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 18, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082809160, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 18, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082809160, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 18, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082809160, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 18, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082809260, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082809260, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082809260, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082809260, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082809260, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082809260, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082814429, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 19, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082814429, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 19, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082814429, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 19, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082814429, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 19, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082814429, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 19, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082814429, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 19, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082814529, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082814529, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082814529, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082814529, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082814529, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082814529, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082890579, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 20, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082890579, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 20, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082890579, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 20, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082890579, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 20, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082890579, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 20, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082890579, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 20, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082890679, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082890679, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082890679, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082890679, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082890679, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082890679, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082891745, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 21, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082891845, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500082892957, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 22, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082893057, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500082893451, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 23, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082893551, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082935568, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 24, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082935668, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082936595, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 25, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082936695, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082938982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 26, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082939082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082953189, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 27, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082953289, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082954684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 28, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082954784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082961821, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 29, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082961821, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 29, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082961821, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 29, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082961821, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 29, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082961821, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 29, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082961821, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 29, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082961921, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082961921, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082961921, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082961921, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082961921, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082961921, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500082967042, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 30, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500082967042, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 30, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082967042, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 30, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500082967042, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 30, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500082967042, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 30, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500082967042, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 30, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500082967142, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500082967142, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082967142, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500082967142, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500082967142, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500082967142, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083044539, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 31, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083044539, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 31, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083044539, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 31, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083044539, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 31, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083044539, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 31, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083044539, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 31, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083044639, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083044639, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083044639, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083044639, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083044639, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083044639, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083046379, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 32, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083046479, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083047546, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 33, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083047646, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083047942, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 34, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083048042, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083085658, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 35, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083085758, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083086582, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 36, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083086682, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083089075, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 37, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083089175, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083106227, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 38, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083106327, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083107735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 39, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083107835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083114977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 40, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083114977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 40, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083114977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 40, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083114977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 40, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083114977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 40, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083114977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 40, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083115077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083115077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083115077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083115077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083115077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083115077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083120272, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 41, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083120272, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 41, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083120272, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 41, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083120272, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 41, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083120272, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 41, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083120272, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 41, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083120372, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083120372, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083120372, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083120372, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083120372, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083120372, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083198088, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 42, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083198088, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 42, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083198088, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 42, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083198088, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 42, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083198088, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 42, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083198088, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 42, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083198188, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083198188, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083198188, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083198188, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083198188, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083198188, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083199492, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 43, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083199592, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083200482, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 44, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083200582, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083200787, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 45, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083200887, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083239020, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 46, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083239120, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083239881, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 47, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083239981, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083242249, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 48, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083242349, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083257684, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 49, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083257784, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083259603, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 50, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083259703, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083267425, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 51, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083267425, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 51, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083267425, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 51, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083267425, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 51, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083267425, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 51, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083267425, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 51, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083267525, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083267525, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083267525, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083267525, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083267525, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083267525, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083272843, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 52, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083272843, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 52, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083272843, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 52, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083272843, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 52, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083272843, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 52, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083272843, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 52, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083272943, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083272943, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083272943, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083272943, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083272943, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083272943, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083349531, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 53, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083349531, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 53, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083349531, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 53, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083349531, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 53, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083349531, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 53, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083349531, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 53, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083349631, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083349631, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083349631, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083349631, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083349631, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083349631, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083350982, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 54, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083351082, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083352055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 55, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083352155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083352641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 56, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083352741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083391673, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 57, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083391773, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083392892, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 58, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083392992, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083395644, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 59, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083395744, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083412871, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 60, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083412971, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083414493, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 61, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083414593, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083422083, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 62, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083422083, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 62, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083422083, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 62, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083422083, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 62, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083422083, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 62, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083422083, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 62, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083422183, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083422183, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083422183, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083422183, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083422183, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083422183, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083427330, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 63, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083427330, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 63, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083427330, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 63, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083427330, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 63, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083427330, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 63, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083427330, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 63, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083427430, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083427430, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083427430, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083427430, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083427430, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083427430, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083503789, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 64, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083503789, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 64, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083503789, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 64, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083503789, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 64, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083503789, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 64, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083503789, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 64, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083503889, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083503889, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083503889, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083503889, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083503889, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083503889, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083504885, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 65, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083504985, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083506006, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 66, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083506106, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083506851, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 67, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083506951, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083547876, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 68, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083547976, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083549195, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 69, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083549295, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083551778, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 70, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083551878, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083568055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 71, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083568155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083569968, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 72, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083570068, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083577735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 73, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083577735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 73, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083577735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 73, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083577735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 73, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083577735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 73, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083577735, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 73, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083577835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083577835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083577835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083577835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083577835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083577835, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083583169, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 74, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083583169, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 74, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083583169, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 74, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083583169, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 74, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083583169, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 74, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083583169, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 74, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083583269, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083583269, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083583269, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083583269, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083583269, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083583269, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083660055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 75, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083660055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 75, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083660055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 75, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083660055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 75, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083660055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 75, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083660055, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 75, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083660155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083660155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083660155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083660155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083660155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083660155, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083662077, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 76, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083662177, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083663697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 77, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083663797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083664148, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 78, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083664248, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083705557, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 79, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083705657, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083706528, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 80, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083706628, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083709190, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 81, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083709290, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083724935, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 82, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083725035, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083726590, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 83, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083726690, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083734146, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 84, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083734146, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 84, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083734146, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 84, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083734146, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 84, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083734146, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 84, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083734146, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 84, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083734246, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083734246, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083734246, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083734246, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083734246, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083734246, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083739418, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 85, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083739418, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 85, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083739418, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 85, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083739418, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 85, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083739418, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 85, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083739418, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 85, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083739518, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083739518, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083739518, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083739518, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083739518, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083739518, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083815755, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 86, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083815755, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 86, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083815755, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 86, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083815755, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 86, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083815755, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 86, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083815755, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 86, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083815855, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083815855, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083815855, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083815855, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083815855, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083815855, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083816994, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 87, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083817094, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083818575, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 88, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083818675, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083819137, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 89, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083819237, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083858463, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 90, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083858563, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083859326, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 91, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083859426, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083861886, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 92, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083861986, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083896166, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 93, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083896266, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083897883, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 94, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083897983, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083905743, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 95, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083905743, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 95, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083905743, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 95, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083905743, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 95, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083905743, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 95, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083905743, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 95, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083905843, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083905843, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083905843, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083905843, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083905843, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083905843, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083911470, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 96, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083911470, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 96, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083911470, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 96, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083911470, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 96, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083911470, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 96, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083911470, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 96, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083911570, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083911570, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083911570, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083911570, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083911570, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083911570, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083988163, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 97, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083988163, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 97, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083988163, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 97, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500083988163, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 97, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083988163, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 97, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083988163, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 97, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083988263, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083988263, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083988263, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500083988263, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083988263, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083988263, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500083989782, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 98, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083989882, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083991199, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 99, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083991299, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500083991641, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 100, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500083991741, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084033448, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 101, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084033548, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084034638, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 102, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084034738, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084037687, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 103, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward_unshard:415", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_forward:380", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:843", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:509", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/models/llama/model.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "forward:857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_call_impl:1536", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_wrapped_call_impl:1527", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:265", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084037787, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084052081, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 104, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084052181, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084054321, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 105, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_prefetch_handle:1216", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_backward_hook:676", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084054421, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084063727, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 106, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084063727, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 106, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084063727, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 106, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084063727, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 106, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084063727, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 106, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084063727, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 106, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084063827, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084063827, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084063827, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084063827, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084063827, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084063827, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084069651, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 107, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084069651, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 107, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084069651, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 107, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084069651, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 107, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084069651, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 107, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084069651, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 107, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
852480 | |
] | |
], | |
"output_sizes": [ | |
[ | |
106560 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084069751, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084069751, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084069751, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084069751, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084069751, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084069751, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084144410, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 108, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084144410, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 108, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084144410, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 108, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084144410, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 108, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084144410, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 108, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084144410, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 108, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
16384256 | |
] | |
], | |
"output_sizes": [ | |
[ | |
2048032 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "reduce_scatter_tensor:3362", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084144510, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084144510, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_reduce_grad:841", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084144510, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_post_backward_hook:749", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084144510, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084144510, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_reduce_scatter_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084144510, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084145965, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 109, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:2222", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "clip_grad_norm_:1169", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:276", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084146065, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500084147289, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 110, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_mean:23", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:303", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084147389, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "B", | |
"ts": 1711500084147718, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 111, | |
"state": "completed", | |
"input_sizes": [ | |
[] | |
], | |
"output_sizes": [ | |
[] | |
] | |
} | |
}, | |
{ | |
"name": "__call__:860", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/_ops.py", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_reduce:188", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/_functional_collectives.py", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "dist_max:18", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/utils.py", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:304", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_reduce", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084147818, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:33", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_model_state_dict:652", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_model_state_dict:333", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1907", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_state_dict_hook:786", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_sharded_pre_state_dict_hook:531", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_common_unshard_pre_state_dict_hook:174", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_enter_unshard_params_ctx:138", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "__enter__:137", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard_fsdp_state_params:196", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084150697, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 112, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard_fsdp_state_params:196", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "__enter__:137", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_enter_unshard_params_ctx:138", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_common_unshard_pre_state_dict_hook:174", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_sharded_pre_state_dict_hook:531", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_state_dict_hook:786", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1907", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_model_state_dict:333", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_model_state_dict:652", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:33", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084150797, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:33", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_model_state_dict:652", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_model_state_dict:333", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1907", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_state_dict_hook:786", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_sharded_pre_state_dict_hook:531", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_common_unshard_pre_state_dict_hook:174", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_enter_unshard_params_ctx:138", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "__enter__:137", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard_fsdp_state_params:196", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084151602, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 113, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard_fsdp_state_params:196", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "__enter__:137", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_enter_unshard_params_ctx:138", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_common_unshard_pre_state_dict_hook:174", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_sharded_pre_state_dict_hook:531", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_state_dict_hook:786", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1907", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_model_state_dict:333", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_model_state_dict:652", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:33", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084151702, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:33", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_model_state_dict:652", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_model_state_dict:333", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:1907", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_pre_state_dict_hook:786", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_sharded_pre_state_dict_hook:531", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_common_unshard_pre_state_dict_hook:174", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_enter_unshard_params_ctx:138", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "__enter__:137", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard_fsdp_state_params:196", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084155202, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 114, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_all_gather_flat_param:1399", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "unshard:1308", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_flat_param.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard:299", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_runtime_utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_unshard_fsdp_state_params:196", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_unshard_param_utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "__enter__:137", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/contextlib.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_enter_unshard_params_ctx:138", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_common_unshard_pre_state_dict_hook:174", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_sharded_pre_state_dict_hook:531", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_pre_state_dict_hook:786", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_state_dict_utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1907", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:1911", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/nn/modules/module.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_model_state_dict:333", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_model_state_dict:652", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:33", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084155302, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1956", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_map_param_key_to_optim_keys:1232", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_object:2439", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084246852, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 115, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_gather_object:2439", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_map_param_key_to_optim_keys:1232", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1956", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084246952, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1956", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_map_param_key_to_optim_keys:1232", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_object:2451", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084292341, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 116, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
355 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
355 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2857", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_gather_object:2451", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_map_param_key_to_optim_keys:1232", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1956", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084292441, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_object:2439", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084297977, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 117, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_gather_object:2439", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084298077, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_object:2451", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084298626, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 118, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1048 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1048 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_gather_object:2451", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084298726, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1688", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_allgather_orig_param_states:1634", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084301177, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 119, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_allgather_orig_param_states:1634", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1688", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084301277, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1688", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_allgather_orig_param_states:1634", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084303139, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 120, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
2048032 | |
] | |
], | |
"output_sizes": [ | |
[ | |
16384256 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_into_tensor:2951", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_allgather_orig_param_states:1634", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1688", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084303239, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_object:2439", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084305279, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 121, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_gather_object:2439", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084305379, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather_object:2451", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "B", | |
"ts": 1711500084306089, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 122, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
1721 | |
] | |
], | |
"output_sizes": [ | |
[ | |
8, | |
1721 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "all_gather:2859", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "all_gather_object:2451", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:75", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_allgather_state_info:1334", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_gather_all_orig_param_state:1687", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:all_gather", | |
"cat": "completed", | |
"ph": "E", | |
"ts": 1711500084306189, | |
"pid": 0, | |
"tid": 4 | |
}, | |
{ | |
"name": "nccl:_all_gather_base", | |
"cat": "completed", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "<module>:389", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "wrapper:347", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "main:368", | |
"cat": "/home/gchauhan/meta/torchtrain/train.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:114", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "inner_func:427", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "save:146", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_stateful_to_state_dict:236", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict_saver.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "state_dict:45", | |
"cat": "/home/gchauhan/meta/torchtrain/torchtrain/checkpoint.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "get_optimizer_state_dict:697", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_get_optim_state_dict:466", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/checkpoint/state_dict.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "optim_state_dict:1847", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict_impl:1270", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "decorate_context:115", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/utils/_contextlib.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_optim_state_dict:1971", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", | |
"input_sizes": [ | |
[ | |
106560 | |
] | |
], | |
"output_sizes": [ | |
[ | |
852480 | |
] | |
] | |
} | |
}, | |
{ | |
"name": "_convert_state_with_orig_params:1794", | |
"cat": "/home/gchauhan/my_envs/llm-amd/lib/python3.11/site-packages/torch/distributed/fsdp/_optim_utils.py", | |
"ph": "B", | |
"ts": 1711500084308939, | |
"pid": 0, | |
"tid": 4, | |
"args": { | |
"seq_id": 123, | |
"state": "completed", |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment