Skip to content

Instantly share code, notes, and snippets.

import torch
import ray.data
class DataGenerator:
def __init__(self, permute_config):
device = torch.device("cuda")
self.model = Model().to(device)
self.config = permute_config
def __call__(self, input):
import ray.data
class DataGenerator:
def __init__(self, permute_config):
device = torch.device("cuda")
self.model = Model().to(device)
self.config = permute_config
def __call__(self, input):
for test_input in self.permute(permute_config, input):
import ray.data
class DataGenerator:
def __init__(self, permute_config):
device = torch.device("cuda")
self.model = Model().to(device)
self.config = permute_config
def __call__(self, input):
for test_input in self.permute(permute_config, input):
import requests
from datetime import datetime
def get_issues_with_ray_in_title(repo_name):
issues = []
page = 1
# headers = {'Authorization': 'token YOUR_GITHUB_TOKEN'}
while True:
issues_url = f"https://api.github.com/repos/{repo_name}/issues?page={page}&per_page=100&state=open"
response = requests.get(issues_url)# headers=headers)
if response.status_code == 200:
import requests
from datetime import datetime
def get_issues_with_ray_in_title(repo_name):
issues = []
page = 1
# headers = {'Authorization': 'token YOUR_GITHUB_TOKEN'}
while True:
issues_url = f"https://api.github.com/repos/{repo_name}/issues?page={page}&per_page=100&state=open"
response = requests.get(issues_url)# headers=headers)
if response.status_code == 200:
import requests
from tabulate import tabulate
from datetime import datetime, timezone
# Retrieve open pull requests
open_url = 'https://api.github.com/repos/vllm-project/vllm/pulls?state=open&per_page=100'
# merged_url = 'https://api.github.com/repos/vllm-project/vllm/pulls?state=closed&per_page=100'
open_response = requests.get(open_url)
# merged_response = requests.get(merged_url)
diff --git a/examples/run.py b/examples/run.py
index 9a05434..d7946db 100644
--- a/examples/run.py
+++ b/examples/run.py
@@ -135,6 +135,12 @@ def parse_arguments(args=None):
choices=["hf", "nemo"],
help="The source of lora checkpoint.")
+ parser.add_argument(
+ '--run_profiling',
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head.
max_workers: 2
docker:
image: "rayproject/ray-ml:latest-gpu"
container_name: "ray_nvidia_docker" # e.g. ray_docker
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
docker:
image: "rayproject/ray-ml:latest-gpu"
container_name: "ray_nvidia_docker" # e.g. ray_docker
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
# This executes all commands on all nodes in the docker container,
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.