This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import perf_counter | |
from typing import Optional, Union | |
import torch | |
if torch.cuda.is_available(): | |
from torch import cuda as accel | |
device = "cuda" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import multiprocessing as mp | |
import os | |
import socket | |
from concurrent.futures import ProcessPoolExecutor | |
import torch | |
import torch.distributed as dist | |
if torch.cuda.is_available(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Minimal distributed profiling. Profiles compute and collective communications by default. Pass the | |
`--no-comms` flag to avoid collectives. Run as in | |
torchrun --nnodes=1 --nproc-per-node=2 profile_maybe_with_comms.py [--no-comms] | |
""" | |
import argparse | |
import os | |
from pathlib import Path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Minimal profiling script for profiling compute/comms overlap. | |
torchrun --nnodes=1 --nproc-per-node=2 profile_comms_compute_overlap.py [--no-comms] | |
""" | |
import argparse | |
import os | |
from pathlib import Path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -l | |
# Minimal torchrun-based launch script | |
# See https://docs.alcf.anl.gov/aurora/data-science/frameworks/pytorch for more recommendations. | |
# Usage: | |
# | |
# # qsub -v SCRIPT_PATH=your_script_path] [ARGS=...] [NPROC_PER_NODE=...] launch_torch.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Raises a ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY after the 29th iteration on an Intel 1550 max. | |
""" | |
import argparse | |
import os | |
import torch | |
import intel_extension_for_pytorch as ipex # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from abc import ABC, abstractmethod | |
from typing import Type | |
import torch | |
import torch.distributed as dist | |
if torch.cuda.is_available(): | |
accel = torch.cuda | |
DEVICE_TYPE = "cuda" | |
BACKEND = "nccl" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Basic FSDP/DDP applied to a linear model. | |
""" | |
import argparse | |
import os | |
import torch | |
import torch.distributed as dist | |
import torch.nn as nn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from abc import ABC, abstractmethod | |
import torch | |
import torch.distributed as dist | |
if torch.cuda.is_available(): | |
accel = torch.cuda | |
DEVICE_TYPE = "cuda" | |
BACKEND = "nccl" | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Launch single-node reduce scatter with multiprocessing. | |
python3 mp_torch_reduce_scatter.py | |
""" | |
import os | |
import socket | |
from concurrent.futures import ProcessPoolExecutor |
OlderNewer