View ray_neuron_training.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import torch | |
from torch import nn | |
from ray.train import ScalingConfig | |
from ray.train.torch import TorchTrainer | |
from ray.train.torch.xla import TorchXLAConfig | |
from torchvision.datasets import mnist |
View precompute_latents.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Cluster: 16 x A10G GPUs | |
Command: python precompute_latents.py --subset_size 50 --mode debug | |
""" | |
import argparse | |
import io | |
import pandas as pd | |
import pyarrow.dataset as pds | |
import os |
View trainium-2node.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script is tested with the PR(https://github.com/ray-project/ray/pull/39130) from AWS team. | |
# It configures the required environment variables for Neuron XLA. | |
import os | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import torch_xla.core.xla_model as xm | |
import torch_xla.distributed.xla_backend # noqa: F401 |
View ray_trainium_ddp.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import torch_xla.core.xla_model as xm | |
import torch_xla.distributed.xla_backend # noqa: F401 | |
from ray.train import ScalingConfig | |
from ray.train.torch import TorchTrainer, prepare_model | |
from ray.train.torch.xla import TorchXLAConfig |
View torch_ddp.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import tempfile | |
import torch | |
from torch import nn | |
from torch.nn.parallel import DistributedDataParallel | |
import ray | |
from ray.train import Checkpoint, CheckpointConfig, RunConfig, ScalingConfig | |
from ray.train.torch import TorchTrainer |
View requirements-repro-262.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
accelerate==0.19.0 | |
adal==1.2.7 | |
aiofiles==22.1.0 | |
aiohttp==3.8.5 | |
aiohttp-cors==0.7.0 | |
aiorwlock==1.3.0 | |
aiosignal==1.3.1 | |
aiosqlite==0.19.0 | |
alabaster==0.7.13 | |
anyio==3.7.1 |
View requirements-repro-nightly.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
accelerate==0.19.0 | |
adal==1.2.7 | |
aiofiles==22.1.0 | |
aiohttp==3.8.5 | |
aiohttp-cors==0.7.0 | |
aiorwlock==1.3.0 | |
aiosignal==1.3.1 | |
aiosqlite==0.19.0 | |
alabaster==0.7.13 | |
anyio==3.7.1 |
View requirements-release-test.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
about-time==4.2.1 | |
absl-py==1.4.0 | |
accelerate==0.19.0 | |
adal==1.2.7 | |
aim==3.17.5 | |
aim-ui==3.17.5 | |
aimrecords==0.0.7 | |
aimrocks==0.4.0 | |
aioboto3==11.2.0 | |
aiobotocore==2.5.0 |
View deepspeed_torch_trainer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Minimal Example adapted from https://huggingface.co/docs/transformers/training | |
import deepspeed | |
import evaluate | |
import torch | |
from datasets import load_dataset | |
from deepspeed.accelerator import get_accelerator | |
from torch.utils.data import DataLoader | |
from tqdm import tqdm | |
from transformers import ( | |
AutoModelForSequenceClassification, |
View accelerate_torch_trainer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import evaluate | |
import torch | |
from datasets import load_dataset | |
from torch.optim import AdamW | |
from torch.utils.data import DataLoader | |
from transformers import ( | |
AutoModelForSequenceClassification, | |
AutoTokenizer, | |
get_linear_schedule_with_warmup, | |
set_seed, |
NewerOlder