This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import argparse | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch.optim as optim | |
from torchvision import datasets, transforms | |
from torch.optim.lr_scheduler import StepLR | |
from ray.train.torch import TorchTrainer | |
from ray.train import ScalingConfig, RunConfig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ray.dag.input_node import InputNode | |
from ray.dag.output_node import MultiOutputNode | |
import ray | |
@ray.remote | |
class Worker: | |
def __init__(self, rank): | |
self.rank = rank | |
self.logs = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
from ray.train.torch import TorchTrainer | |
from ray.train import RunConfig, ScalingConfig | |
import time | |
def train_func(): | |
print("Training Starts") | |
time.sleep(100) | |
datasets = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
task. | |
class_name: RayTrainWorker | |
actor_id: 9e6790a209b7c509e64301f305000000 | |
pid: 35979 | |
namespace: f205d617-4ee1-4fae-a76a-c3f2382b7527 | |
ip: 172.24.101.245 | |
The actor is dead because its worker process has died. Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code None. Traceback (most recent call last): | |
File "python/ray/_raylet.pyx", line 1883, in ray._raylet.execute_task | |
File "python/ray/_raylet.pyx", line 1984, in ray._raylet.execute_task | |
File "python/ray/_raylet.pyx", line 1889, in ray._raylet.execute_task |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from ray.train._internal.utils import get_address_and_port | |
import ray | |
import os | |
import torch | |
import torch.nn as nn | |
from torch.nn.parallel import DistributedDataParallel as DDP | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
ray.init() | |
node_resources = {} | |
for node in ray.nodes(): | |
print(node, "\n") | |
node_resources[node["NodeID"]] = node["Resources"] | |
import ray |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# pylint: skip-file | |
import os | |
import torch | |
from torch import distributed as dist | |
from torchvision.models import resnet18 | |
from torchvision.datasets import FashionMNIST | |
from torchvision.transforms import ToTensor, Normalize, Compose | |
from torch.utils.data import DataLoader, DistributedSampler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
import ray.train | |
import numpy as np | |
from ray.train.torch import TorchTrainer | |
from ray.train import ScalingConfig | |
from dataclasses import dataclass | |
@dataclass | |
class DummyDataclass: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import torch | |
from torch import nn | |
from ray.train import ScalingConfig | |
from ray.train.torch import TorchTrainer | |
from ray.train.torch.xla import TorchXLAConfig | |
from torchvision.datasets import mnist |
NewerOlder