Skip to content

Instantly share code, notes, and snippets.

View garrett361's full-sized avatar

Garrett Goon garrett361

View GitHub Profile
@garrett361
garrett361 / matmul_bench.py
Last active April 23, 2024 13:09
xpu and cuda matmul timing
from time import perf_counter
from typing import Optional, Union
import torch
if torch.cuda.is_available():
from torch import cuda as accel
device = "cuda"
@garrett361
garrett361 / allgather_test_mp.py
Last active April 12, 2024 03:06
torch allgather test mp
import argparse
import multiprocessing as mp
import os
import socket
from concurrent.futures import ProcessPoolExecutor
import torch
import torch.distributed as dist
if torch.cuda.is_available():
@garrett361
garrett361 / profile_maybe_with_comms.py
Created April 15, 2024 20:50
pytorch profile with comms
"""
Minimal distributed profiling. Profiles compute and collective communications by default. Pass the
`--no-comms` flag to avoid collectives. Run as in
torchrun --nnodes=1 --nproc-per-node=2 profile_maybe_with_comms.py [--no-comms]
"""
import argparse
import os
from pathlib import Path
@garrett361
garrett361 / profile_comms_compute_overlap.py
Created April 22, 2024 18:30
Minimally profile comms/compute overlap
"""
Minimal profiling script for profiling compute/comms overlap.
torchrun --nnodes=1 --nproc-per-node=2 profile_comms_compute_overlap.py [--no-comms]
"""
import argparse
import os
from pathlib import Path
@garrett361
garrett361 / launch_torchrun.sh
Last active May 16, 2024 12:41
torchrun Sunspot
#!/bin/bash -l
# Minimal torchrun-based launch script
# See https://docs.alcf.anl.gov/aurora/data-science/frameworks/pytorch for more recommendations.
# Usage:
#
# # qsub -v SCRIPT_PATH=your_script_path] [ARGS=...] [NPROC_PER_NODE=...] launch_torch.sh
@garrett361
garrett361 / reduce_scatter.py
Created May 29, 2024 21:05
Reduce scatter tests
"""
Raises a ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY after the 29th iteration on an Intel 1550 max.
"""
import argparse
import os
import torch
import intel_extension_for_pytorch as ipex # noqa
@garrett361
garrett361 / collectives.py
Last active June 7, 2024 13:54
Collectives timing
from abc import ABC, abstractmethod
from typing import Type
import torch
import torch.distributed as dist
if torch.cuda.is_available():
accel = torch.cuda
DEVICE_TYPE = "cuda"
BACKEND = "nccl"
@garrett361
garrett361 / linear_model_fsdp_ddp.py
Last active June 10, 2024 14:37
fdsp and ddp min tests
"""
Basic FSDP/DDP applied to a linear model.
"""
import argparse
import os
import torch
import torch.distributed as dist
import torch.nn as nn
@garrett361
garrett361 / collective.py
Created June 4, 2024 18:22
Torch Profile Comms Compute Overlap
from abc import ABC, abstractmethod
import torch
import torch.distributed as dist
if torch.cuda.is_available():
accel = torch.cuda
DEVICE_TYPE = "cuda"
BACKEND = "nccl"
else:
@garrett361
garrett361 / mp_torch_reduce_scatter.py
Created June 6, 2024 13:28
mp reduce scatter xpu
"""
Launch single-node reduce scatter with multiprocessing.
python3 mp_torch_reduce_scatter.py
"""
import os
import socket
from concurrent.futures import ProcessPoolExecutor