Last active
April 12, 2024 17:12
-
-
Save baughmann/9d7158c958570d0bbeb4afe0be7c2527 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# It's best to run this in a local notebook as two separate code blocks | |
# | |
# | |
# First, manually initialize the Ray cluster | |
# | |
import ray | |
if ray.is_initialized(): | |
ray.shutdown() | |
ray.init( | |
num_gpus=1, | |
include_dashboard=True, | |
dashboard_host="0.0.0.0", | |
) | |
# | |
# Next, try to get vLLM to use the existing ray cluster | |
# | |
from vllm import AsyncEngineArgs, AsyncLLMEngine | |
args = AsyncEngineArgs( | |
model="/your/model/path", # replace this | |
engine_use_ray=True, | |
gpu_memory_utilization=0.5 | |
) | |
engine = AsyncLLMEngine.from_engine_args(args) | |
# after some time you will see: | |
# (autoscaler +6s) Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.5}. Add suitable node types to this cluster to resolve this issue. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment