[Ray] Propagate third-party env vars to Ray workers via prefix matching (#34383)
Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com> Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
committed by
GitHub
parent
c5c38e152a
commit
c464b57374
@@ -123,6 +123,7 @@ steps:
|
||||
- tests/test_inputs.py
|
||||
- tests/test_outputs.py
|
||||
- tests/test_pooling_params.py
|
||||
- tests/test_ray_env.py
|
||||
- tests/multimodal
|
||||
- tests/renderers
|
||||
- tests/standalone_tests/lazy_imports.py
|
||||
@@ -136,6 +137,7 @@ steps:
|
||||
- pytest -v -s test_inputs.py
|
||||
- pytest -v -s test_outputs.py
|
||||
- pytest -v -s test_pooling_params.py
|
||||
- pytest -v -s test_ray_env.py
|
||||
- pytest -v -s -m 'cpu_test' multimodal
|
||||
- pytest -v -s renderers
|
||||
- pytest -v -s tokenizers_
|
||||
|
||||
194
tests/test_ray_env.py
Normal file
194
tests/test_ray_env.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Tests for vllm.ray.ray_env — env var propagation to Ray workers."""
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
from vllm.ray.ray_env import get_env_vars_to_copy
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Default prefix matching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDefaultPrefixes:
|
||||
"""Built-in prefixes (VLLM_, LMCACHE_, NCCL_, UCX_, HF_, HUGGING_FACE_)
|
||||
should be forwarded without any extra configuration."""
|
||||
|
||||
@patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
|
||||
def test_lmcache_prefix(self):
|
||||
result = get_env_vars_to_copy()
|
||||
assert "LMCACHE_LOCAL_CPU" in result
|
||||
|
||||
@patch.dict(os.environ, {"NCCL_DEBUG": "INFO"}, clear=False)
|
||||
def test_nccl_prefix(self):
|
||||
result = get_env_vars_to_copy()
|
||||
assert "NCCL_DEBUG" in result
|
||||
|
||||
@patch.dict(os.environ, {"UCX_TLS": "rc"}, clear=False)
|
||||
def test_ucx_prefix(self):
|
||||
result = get_env_vars_to_copy()
|
||||
assert "UCX_TLS" in result
|
||||
|
||||
@patch.dict(os.environ, {"HF_TOKEN": "secret"}, clear=False)
|
||||
def test_hf_token_via_prefix(self):
|
||||
result = get_env_vars_to_copy()
|
||||
assert "HF_TOKEN" in result
|
||||
|
||||
@patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": "secret"}, clear=False)
|
||||
def test_hugging_face_prefix(self):
|
||||
result = get_env_vars_to_copy()
|
||||
assert "HUGGING_FACE_HUB_TOKEN" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Default extra vars
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDefaultExtraVars:
|
||||
"""Individual vars listed in VLLM_RAY_EXTRA_ENV_VARS_TO_COPY's default."""
|
||||
|
||||
def test_pythonhashseed_in_result(self):
|
||||
"""PYTHONHASHSEED should always be in the result set (as a name to
|
||||
copy) regardless of whether it is actually set in os.environ."""
|
||||
result = get_env_vars_to_copy()
|
||||
assert "PYTHONHASHSEED" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# User-supplied extensions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestUserExtensions:
|
||||
"""Users can add prefixes and extra vars at deploy time."""
|
||||
|
||||
@patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": "MYLIB_",
|
||||
"MYLIB_FOO": "bar",
|
||||
},
|
||||
clear=False,
|
||||
)
|
||||
def test_user_prefix(self):
|
||||
"""User-supplied prefixes are additive — built-in defaults are kept."""
|
||||
result = get_env_vars_to_copy()
|
||||
assert "MYLIB_FOO" in result
|
||||
|
||||
@patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": "MY_SECRET",
|
||||
"MY_SECRET": "val",
|
||||
},
|
||||
clear=False,
|
||||
)
|
||||
def test_user_extra_var(self):
|
||||
"""User-supplied extras are additive — PYTHONHASHSEED still included."""
|
||||
result = get_env_vars_to_copy()
|
||||
assert "MY_SECRET" in result
|
||||
assert "PYTHONHASHSEED" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Exclusion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExclusion:
|
||||
"""exclude_vars and RAY_NON_CARRY_OVER_ENV_VARS take precedence."""
|
||||
|
||||
@patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}, clear=False)
|
||||
def test_exclude_vars(self):
|
||||
result = get_env_vars_to_copy(exclude_vars={"CUDA_VISIBLE_DEVICES"})
|
||||
assert "CUDA_VISIBLE_DEVICES" not in result
|
||||
|
||||
@patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
|
||||
@patch(
|
||||
"vllm.ray.ray_env.RAY_NON_CARRY_OVER_ENV_VARS",
|
||||
{"LMCACHE_LOCAL_CPU"},
|
||||
)
|
||||
def test_non_carry_over_blacklist(self):
|
||||
result = get_env_vars_to_copy()
|
||||
assert "LMCACHE_LOCAL_CPU" not in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# additional_vars (platform extension point)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAdditionalVars:
|
||||
"""The additional_vars parameter supports platform-specific vars."""
|
||||
|
||||
@patch.dict(os.environ, {"CUSTOM_PLATFORM_VAR": "1"}, clear=False)
|
||||
def test_additional_vars_passthrough(self):
|
||||
result = get_env_vars_to_copy(additional_vars={"CUSTOM_PLATFORM_VAR"})
|
||||
assert "CUSTOM_PLATFORM_VAR" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Prefix matching should be strict (startswith, not contains)."""
|
||||
|
||||
@patch.dict(os.environ, {"LMCACH_TYPO": "1"}, clear=False)
|
||||
def test_prefix_no_partial_match(self):
|
||||
"""'LMCACH_' does not match the 'LMCACHE_' prefix."""
|
||||
result = get_env_vars_to_copy()
|
||||
assert "LMCACH_TYPO" not in result
|
||||
|
||||
@patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": " MYLIB_ , OTHER_ ",
|
||||
},
|
||||
clear=False,
|
||||
)
|
||||
def test_csv_whitespace_handling(self):
|
||||
"""Whitespace around commas and tokens should be stripped."""
|
||||
result = get_env_vars_to_copy()
|
||||
# MYLIB_ and OTHER_ should be parsed as valid prefixes — no crash
|
||||
assert isinstance(result, set)
|
||||
|
||||
@patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": "MYLIB_",
|
||||
"LMCACHE_BACKEND": "cpu",
|
||||
"NCCL_DEBUG": "INFO",
|
||||
"MYLIB_FOO": "bar",
|
||||
},
|
||||
clear=False,
|
||||
)
|
||||
def test_user_prefix_additive(self):
|
||||
"""Setting VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY does NOT drop defaults."""
|
||||
result = get_env_vars_to_copy()
|
||||
# Built-in defaults still present
|
||||
assert "LMCACHE_BACKEND" in result
|
||||
assert "NCCL_DEBUG" in result
|
||||
# User addition also present
|
||||
assert "MYLIB_FOO" in result
|
||||
|
||||
@patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": "MY_FLAG",
|
||||
"PYTHONHASHSEED": "42",
|
||||
"MY_FLAG": "1",
|
||||
},
|
||||
clear=False,
|
||||
)
|
||||
def test_user_extra_additive(self):
|
||||
"""Setting VLLM_RAY_EXTRA_ENV_VARS_TO_COPY does NOT drop defaults."""
|
||||
result = get_env_vars_to_copy()
|
||||
# Built-in default still present
|
||||
assert "PYTHONHASHSEED" in result
|
||||
# User addition also present
|
||||
assert "MY_FLAG" in result
|
||||
15
vllm/envs.py
15
vllm/envs.py
@@ -139,6 +139,8 @@ if TYPE_CHECKING:
|
||||
VLLM_ENABLE_MOE_DP_CHUNK: bool = True
|
||||
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
|
||||
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
|
||||
VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY: str = ""
|
||||
VLLM_RAY_EXTRA_ENV_VARS_TO_COPY: str = ""
|
||||
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
|
||||
VLLM_MARLIN_INPUT_DTYPE: Literal["int8", "fp8"] | None = None
|
||||
VLLM_MXFP4_USE_MARLIN: bool | None = None
|
||||
@@ -1090,6 +1092,19 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_RAY_DP_PACK_STRATEGY": lambda: os.getenv(
|
||||
"VLLM_RAY_DP_PACK_STRATEGY", "strict"
|
||||
),
|
||||
# Comma-separated *additional* prefixes of env vars to copy from the
|
||||
# driver to Ray workers. These are merged with the built-in defaults
|
||||
# defined in ``vllm.ray.ray_env`` (VLLM_, etc.). Example: "MYLIB_,OTHER_"
|
||||
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": lambda: os.getenv(
|
||||
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY", ""
|
||||
),
|
||||
# Comma-separated *additional* individual env var names to copy from
|
||||
# the driver to Ray workers. Merged with the built-in defaults
|
||||
# defined in ``vllm.ray.ray_env`` (PYTHONHASHSEED).
|
||||
# Example: "MY_SECRET,MY_FLAG"
|
||||
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": lambda: os.getenv(
|
||||
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY", ""
|
||||
),
|
||||
# Whether to use S3 path for model loading in CI via RunAI Streamer
|
||||
"VLLM_CI_USE_S3": lambda: os.environ.get("VLLM_CI_USE_S3", "0") == "1",
|
||||
# Use model_redirect to redirect the model name to a local folder.
|
||||
|
||||
@@ -10,8 +10,7 @@ logger = init_logger(__name__)
|
||||
|
||||
CONFIG_HOME = envs.VLLM_CONFIG_ROOT
|
||||
|
||||
# This file contains a list of env vars that should not be copied
|
||||
# from the driver to the Ray workers.
|
||||
# Env vars that should NOT be copied from the driver to Ray workers.
|
||||
RAY_NON_CARRY_OVER_ENV_VARS_FILE = os.path.join(
|
||||
CONFIG_HOME, "ray_non_carry_over_env_vars.json"
|
||||
)
|
||||
@@ -29,51 +28,89 @@ except json.JSONDecodeError:
|
||||
)
|
||||
RAY_NON_CARRY_OVER_ENV_VARS = set()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Built-in defaults for env var propagation.
|
||||
# Users can add more via VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY and
|
||||
# VLLM_RAY_EXTRA_ENV_VARS_TO_COPY (additive, not replacing).
|
||||
# ---------------------------------------------------------------------------
|
||||
DEFAULT_ENV_VAR_PREFIXES: set[str] = {
|
||||
"VLLM_",
|
||||
"LMCACHE_",
|
||||
"NCCL_",
|
||||
"UCX_",
|
||||
"HF_",
|
||||
"HUGGING_FACE_",
|
||||
}
|
||||
|
||||
DEFAULT_EXTRA_ENV_VARS: set[str] = {
|
||||
"PYTHONHASHSEED",
|
||||
}
|
||||
|
||||
|
||||
def _parse_csv(value: str) -> set[str]:
|
||||
"""Split a comma-separated string into a set of stripped, non-empty tokens."""
|
||||
return {tok.strip() for tok in value.split(",") if tok.strip()}
|
||||
|
||||
|
||||
def get_env_vars_to_copy(
|
||||
exclude_vars: set[str] | None = None,
|
||||
additional_vars: set[str] | None = None,
|
||||
destination: str | None = None,
|
||||
) -> set[str]:
|
||||
"""
|
||||
Get the environment variables to copy to downstream Ray actors.
|
||||
"""Return the env var names to copy from the driver to Ray actors.
|
||||
|
||||
Example use cases:
|
||||
- Copy environment variables from RayDistributedExecutor to Ray workers.
|
||||
- Copy environment variables from RayDPClient to Ray DPEngineCoreActor.
|
||||
The result is the union of:
|
||||
|
||||
1. Env vars registered in ``vllm.envs.environment_variables``.
|
||||
2. Env vars in ``os.environ`` matching a prefix in
|
||||
``DEFAULT_ENV_VAR_PREFIXES`` + ``VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY``.
|
||||
3. Individual names in ``DEFAULT_EXTRA_ENV_VARS`` +
|
||||
``VLLM_RAY_EXTRA_ENV_VARS_TO_COPY``.
|
||||
4. Caller-supplied *additional_vars* (e.g. platform-specific).
|
||||
|
||||
Minus any names in *exclude_vars* or ``RAY_NON_CARRY_OVER_ENV_VARS``.
|
||||
|
||||
Args:
|
||||
exclude_vars: A set of vllm defined environment variables to exclude
|
||||
from copying.
|
||||
additional_vars: A set of additional environment variables to copy.
|
||||
If a variable is in both exclude_vars and additional_vars, it will
|
||||
be excluded.
|
||||
destination: The destination of the environment variables.
|
||||
Returns:
|
||||
A set of environment variables to copy.
|
||||
exclude_vars: Env vars to exclude (e.g. worker-specific ones).
|
||||
additional_vars: Extra individual env var names to copy. Useful
|
||||
for caller-specific vars (e.g. platform env vars).
|
||||
destination: Label used in log messages only.
|
||||
"""
|
||||
exclude_vars = exclude_vars or set()
|
||||
additional_vars = additional_vars or set()
|
||||
exclude = (exclude_vars or set()) | RAY_NON_CARRY_OVER_ENV_VARS
|
||||
|
||||
env_vars_to_copy = {
|
||||
v
|
||||
for v in set(envs.environment_variables).union(additional_vars)
|
||||
if v not in exclude_vars and v not in RAY_NON_CARRY_OVER_ENV_VARS
|
||||
}
|
||||
|
||||
to_destination = " to " + destination if destination is not None else ""
|
||||
|
||||
logger.info(
|
||||
"RAY_NON_CARRY_OVER_ENV_VARS from config: %s", RAY_NON_CARRY_OVER_ENV_VARS
|
||||
# -- prefixes (built-in + user-supplied, additive) ----------------------
|
||||
prefixes = DEFAULT_ENV_VAR_PREFIXES | _parse_csv(
|
||||
envs.VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY
|
||||
)
|
||||
|
||||
# -- collect env var names ----------------------------------------------
|
||||
# 1. vLLM's registered env vars
|
||||
result = set(envs.environment_variables)
|
||||
# 2. Prefix-matched vars present in the current environment
|
||||
result |= {name for name in os.environ if any(name.startswith(p) for p in prefixes)}
|
||||
# 3. Individual extra vars (built-in + user-supplied, additive)
|
||||
result |= DEFAULT_EXTRA_ENV_VARS | _parse_csv(envs.VLLM_RAY_EXTRA_ENV_VARS_TO_COPY)
|
||||
# 4. Caller-supplied extra vars (e.g. platform-specific)
|
||||
result |= additional_vars or set()
|
||||
# 5. Exclude worker-specific and user-blacklisted vars
|
||||
result -= exclude
|
||||
|
||||
# -- logging ------------------------------------------------------------
|
||||
dest = f" to {destination}" if destination else ""
|
||||
logger.info("Env var prefixes to copy: %s", sorted(prefixes))
|
||||
logger.info(
|
||||
"Copying the following environment variables%s: %s",
|
||||
to_destination,
|
||||
[v for v in env_vars_to_copy if v in os.environ],
|
||||
dest,
|
||||
sorted(v for v in result if v in os.environ),
|
||||
)
|
||||
if RAY_NON_CARRY_OVER_ENV_VARS:
|
||||
logger.info(
|
||||
"RAY_NON_CARRY_OVER_ENV_VARS from config: %s",
|
||||
RAY_NON_CARRY_OVER_ENV_VARS,
|
||||
)
|
||||
logger.info(
|
||||
"If certain env vars should NOT be copied, add them to %s file",
|
||||
"To exclude env vars from copying, add them to %s",
|
||||
RAY_NON_CARRY_OVER_ENV_VARS_FILE,
|
||||
)
|
||||
|
||||
return env_vars_to_copy
|
||||
return result
|
||||
|
||||
@@ -73,9 +73,6 @@ class RayDistributedExecutor(Executor):
|
||||
"ROCR_VISIBLE_DEVICES",
|
||||
}
|
||||
|
||||
# These non-vLLM env vars are copied from the driver to workers
|
||||
ADDITIONAL_ENV_VARS = {"HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"}
|
||||
|
||||
uses_ray: bool = True
|
||||
supports_pp: bool = True
|
||||
|
||||
@@ -339,9 +336,7 @@ class RayDistributedExecutor(Executor):
|
||||
# Environment variables to copy from driver to workers
|
||||
env_vars_to_copy = get_env_vars_to_copy(
|
||||
exclude_vars=self.WORKER_SPECIFIC_ENV_VARS,
|
||||
additional_vars=set(current_platform.additional_env_vars).union(
|
||||
self.ADDITIONAL_ENV_VARS
|
||||
),
|
||||
additional_vars=set(current_platform.additional_env_vars),
|
||||
destination="workers",
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user