This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
from ray.train.torch import TorchTrainer | |
from ray.train import RunConfig, ScalingConfig | |
import time | |
def train_func(): | |
print("Training Starts") | |
time.sleep(100) | |
datasets = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
task. | |
class_name: RayTrainWorker | |
actor_id: 9e6790a209b7c509e64301f305000000 | |
pid: 35979 | |
namespace: f205d617-4ee1-4fae-a76a-c3f2382b7527 | |
ip: 172.24.101.245 | |
The actor is dead because its worker process has died. Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code None. Traceback (most recent call last): | |
File "python/ray/_raylet.pyx", line 1883, in ray._raylet.execute_task | |
File "python/ray/_raylet.pyx", line 1984, in ray._raylet.execute_task | |
File "python/ray/_raylet.pyx", line 1889, in ray._raylet.execute_task |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from ray.train._internal.utils import get_address_and_port | |
import ray | |
import os | |
import torch | |
import torch.nn as nn | |
from torch.nn.parallel import DistributedDataParallel as DDP | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
ray.init() | |
node_resources = {} | |
for node in ray.nodes(): | |
print(node, "\n") | |
node_resources[node["NodeID"]] = node["Resources"] | |
import ray |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# pylint: skip-file | |
import os | |
import torch | |
from torch import distributed as dist | |
from torchvision.models import resnet18 | |
from torchvision.datasets import FashionMNIST | |
from torchvision.transforms import ToTensor, Normalize, Compose | |
from torch.utils.data import DataLoader, DistributedSampler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
import ray.train | |
import numpy as np | |
from ray.train.torch import TorchTrainer | |
from ray.train import ScalingConfig | |
from dataclasses import dataclass | |
@dataclass | |
class DummyDataclass: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import torch | |
from torch import nn | |
from ray.train import ScalingConfig | |
from ray.train.torch import TorchTrainer | |
from ray.train.torch.xla import TorchXLAConfig | |
from torchvision.datasets import mnist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Cluster: 16 x A10G GPUs | |
Command: python precompute_latents.py --subset_size 50 --mode debug | |
""" | |
import argparse | |
import io | |
import pandas as pd | |
import pyarrow.dataset as pds | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script is tested with the PR(https://github.com/ray-project/ray/pull/39130) from AWS team. | |
# It configures the required environment variables for Neuron XLA. | |
import os | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import torch_xla.core.xla_model as xm | |
import torch_xla.distributed.xla_backend # noqa: F401 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
import torch_xla.core.xla_model as xm | |
import torch_xla.distributed.xla_backend # noqa: F401 | |
from ray.train import ScalingConfig | |
from ray.train.torch import TorchTrainer, prepare_model | |
from ray.train.torch.xla import TorchXLAConfig |
NewerOlder