[Misc] Getting and passing ray runtime_env to workers (#22040)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
Rui Qiao
2025-08-01 23:54:40 -07:00
committed by GitHub
parent d3a6f2120b
commit 4ac8437352
6 changed files with 77 additions and 13 deletions

View File

@@ -36,6 +36,7 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
from vllm.logger import init_logger
from vllm.platforms import CpuArchEnum, current_platform
from vllm.plugins import load_general_plugins
from vllm.ray.lazy_utils import is_ray_initialized
from vllm.reasoning import ReasoningParserManager
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
from vllm.transformers_utils.utils import check_gguf_file
@@ -1099,6 +1100,15 @@ class EngineArgs:
kv_sharing_fast_prefill=self.kv_sharing_fast_prefill,
)
ray_runtime_env = None
if is_ray_initialized():
# Ray Serve LLM calls `create_engine_config` in the context
# of a Ray task, therefore we check is_ray_initialized()
# as opposed to is_in_ray_actor().
import ray
ray_runtime_env = ray.get_runtime_context().runtime_env
logger.info("Using ray runtime env: %s", ray_runtime_env)
# Get the current placement group if Ray is initialized and
# we are in a Ray actor. If so, then the placement group will be
# passed to spawned processes.
@@ -1211,6 +1221,7 @@ class EngineArgs:
max_parallel_loading_workers=self.max_parallel_loading_workers,
disable_custom_all_reduce=self.disable_custom_all_reduce,
ray_workers_use_nsight=self.ray_workers_use_nsight,
ray_runtime_env=ray_runtime_env,
placement_group=placement_group,
distributed_executor_backend=self.distributed_executor_backend,
worker_cls=self.worker_cls,