Skip to content

Instantly share code, notes, and snippets.

import requests
from tabulate import tabulate
from datetime import datetime, timezone
# Retrieve open pull requests
open_url = 'https://api.github.com/repos/vllm-project/vllm/pulls?state=open&per_page=100'
# merged_url = 'https://api.github.com/repos/vllm-project/vllm/pulls?state=closed&per_page=100'
open_response = requests.get(open_url)
# merged_response = requests.get(merged_url)
diff --git a/examples/run.py b/examples/run.py
index 9a05434..d7946db 100644
--- a/examples/run.py
+++ b/examples/run.py
@@ -135,6 +135,12 @@ def parse_arguments(args=None):
choices=["hf", "nemo"],
help="The source of lora checkpoint.")
+ parser.add_argument(
+ '--run_profiling',
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head.
max_workers: 2
docker:
image: "rayproject/ray-ml:latest-gpu"
container_name: "ray_nvidia_docker" # e.g. ray_docker
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
docker:
image: "rayproject/ray-ml:latest-gpu"
container_name: "ray_nvidia_docker" # e.g. ray_docker
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
# This executes all commands on all nodes in the docker container,
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
# This executes all commands on all nodes in the docker container,
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
# The autoscaler will scale up the cluster faster with higher upscaling speed.
# E.g., if the task requires adding more nodes then autoscaler will gradually
# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
# An unique identifier for the head node and workers of this cluster.
cluster_name: gpu-docker
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 2
# The autoscaler will scale up the cluster faster with higher upscaling speed.
# E.g., if the task requires adding more nodes then autoscaler will gradually
# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests
import boto3
import io
import time
import pandas as pd
import numpy as np
from ray.train.huggingface import HuggingFacePredictor
import torch
import torch.nn.functional as F
from torchmetrics import Accuracy
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl
from ray.air.config import ScalingConfig
from ray.train.lightning import LightningTrainer, LightningConfigBuilder