baughmann/init_ray_separately.py

## init_ray_separately.py
# It's best to run this in a local notebook as two separate code blocks
#
#
# First, manually initialize the Ray cluster
#
import ray

if ray.is_initialized():
    ray.shutdown()

ray.init(
    num_gpus=1,
    include_dashboard=True,
    dashboard_host="0.0.0.0",
)

#
# Next, try to get vLLM to use the existing ray cluster
#
from vllm import AsyncEngineArgs, AsyncLLMEngine
args = AsyncEngineArgs(
    model="/your/model/path", # replace this
    engine_use_ray=True,
    gpu_memory_utilization=0.5
)

engine = AsyncLLMEngine.from_engine_args(args)

# after some time you will see:
# (autoscaler +6s) Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.5}. Add suitable node types to this cluster to resolve this issue.
	# It's best to run this in a local notebook as two separate code blocks
	#
	#
	# First, manually initialize the Ray cluster
	#
	import ray

	if ray.is_initialized():
	ray.shutdown()

	ray.init(
	num_gpus=1,
	include_dashboard=True,
	dashboard_host="0.0.0.0",
	)

	#
	# Next, try to get vLLM to use the existing ray cluster
	#
	from vllm import AsyncEngineArgs, AsyncLLMEngine
	args = AsyncEngineArgs(
	model="/your/model/path", # replace this
	engine_use_ray=True,
	gpu_memory_utilization=0.5
	)

	engine = AsyncLLMEngine.from_engine_args(args)

	# after some time you will see:
	# (autoscaler +6s) Error: No available node types can fulfill resource request {'CPU': 1.0, 'GPU': 0.5}. Add suitable node types to this cluster to resolve this issue.