[Ray] Propagate third-party env vars to Ray workers via prefix matching (#34383)

Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
kourosh hakhamaneshi
2026-02-17 01:08:42 -08:00
committed by GitHub
parent c5c38e152a
commit c464b57374
5 changed files with 280 additions and 37 deletions

View File

@@ -123,6 +123,7 @@ steps:
- tests/test_inputs.py - tests/test_inputs.py
- tests/test_outputs.py - tests/test_outputs.py
- tests/test_pooling_params.py - tests/test_pooling_params.py
- tests/test_ray_env.py
- tests/multimodal - tests/multimodal
- tests/renderers - tests/renderers
- tests/standalone_tests/lazy_imports.py - tests/standalone_tests/lazy_imports.py
@@ -136,6 +137,7 @@ steps:
- pytest -v -s test_inputs.py - pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py - pytest -v -s test_outputs.py
- pytest -v -s test_pooling_params.py - pytest -v -s test_pooling_params.py
- pytest -v -s test_ray_env.py
- pytest -v -s -m 'cpu_test' multimodal - pytest -v -s -m 'cpu_test' multimodal
- pytest -v -s renderers - pytest -v -s renderers
- pytest -v -s tokenizers_ - pytest -v -s tokenizers_

194
tests/test_ray_env.py Normal file
View File

@@ -0,0 +1,194 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Tests for vllm.ray.ray_env — env var propagation to Ray workers."""
import os
from unittest.mock import patch
from vllm.ray.ray_env import get_env_vars_to_copy
# ---------------------------------------------------------------------------
# Default prefix matching
# ---------------------------------------------------------------------------
class TestDefaultPrefixes:
"""Built-in prefixes (VLLM_, LMCACHE_, NCCL_, UCX_, HF_, HUGGING_FACE_)
should be forwarded without any extra configuration."""
@patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
def test_lmcache_prefix(self):
result = get_env_vars_to_copy()
assert "LMCACHE_LOCAL_CPU" in result
@patch.dict(os.environ, {"NCCL_DEBUG": "INFO"}, clear=False)
def test_nccl_prefix(self):
result = get_env_vars_to_copy()
assert "NCCL_DEBUG" in result
@patch.dict(os.environ, {"UCX_TLS": "rc"}, clear=False)
def test_ucx_prefix(self):
result = get_env_vars_to_copy()
assert "UCX_TLS" in result
@patch.dict(os.environ, {"HF_TOKEN": "secret"}, clear=False)
def test_hf_token_via_prefix(self):
result = get_env_vars_to_copy()
assert "HF_TOKEN" in result
@patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": "secret"}, clear=False)
def test_hugging_face_prefix(self):
result = get_env_vars_to_copy()
assert "HUGGING_FACE_HUB_TOKEN" in result
# ---------------------------------------------------------------------------
# Default extra vars
# ---------------------------------------------------------------------------
class TestDefaultExtraVars:
"""Individual vars listed in VLLM_RAY_EXTRA_ENV_VARS_TO_COPY's default."""
def test_pythonhashseed_in_result(self):
"""PYTHONHASHSEED should always be in the result set (as a name to
copy) regardless of whether it is actually set in os.environ."""
result = get_env_vars_to_copy()
assert "PYTHONHASHSEED" in result
# ---------------------------------------------------------------------------
# User-supplied extensions
# ---------------------------------------------------------------------------
class TestUserExtensions:
"""Users can add prefixes and extra vars at deploy time."""
@patch.dict(
os.environ,
{
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": "MYLIB_",
"MYLIB_FOO": "bar",
},
clear=False,
)
def test_user_prefix(self):
"""User-supplied prefixes are additive — built-in defaults are kept."""
result = get_env_vars_to_copy()
assert "MYLIB_FOO" in result
@patch.dict(
os.environ,
{
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": "MY_SECRET",
"MY_SECRET": "val",
},
clear=False,
)
def test_user_extra_var(self):
"""User-supplied extras are additive — PYTHONHASHSEED still included."""
result = get_env_vars_to_copy()
assert "MY_SECRET" in result
assert "PYTHONHASHSEED" in result
# ---------------------------------------------------------------------------
# Exclusion
# ---------------------------------------------------------------------------
class TestExclusion:
"""exclude_vars and RAY_NON_CARRY_OVER_ENV_VARS take precedence."""
@patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}, clear=False)
def test_exclude_vars(self):
result = get_env_vars_to_copy(exclude_vars={"CUDA_VISIBLE_DEVICES"})
assert "CUDA_VISIBLE_DEVICES" not in result
@patch.dict(os.environ, {"LMCACHE_LOCAL_CPU": "True"}, clear=False)
@patch(
"vllm.ray.ray_env.RAY_NON_CARRY_OVER_ENV_VARS",
{"LMCACHE_LOCAL_CPU"},
)
def test_non_carry_over_blacklist(self):
result = get_env_vars_to_copy()
assert "LMCACHE_LOCAL_CPU" not in result
# ---------------------------------------------------------------------------
# additional_vars (platform extension point)
# ---------------------------------------------------------------------------
class TestAdditionalVars:
"""The additional_vars parameter supports platform-specific vars."""
@patch.dict(os.environ, {"CUSTOM_PLATFORM_VAR": "1"}, clear=False)
def test_additional_vars_passthrough(self):
result = get_env_vars_to_copy(additional_vars={"CUSTOM_PLATFORM_VAR"})
assert "CUSTOM_PLATFORM_VAR" in result
# ---------------------------------------------------------------------------
# Edge cases
# ---------------------------------------------------------------------------
class TestEdgeCases:
"""Prefix matching should be strict (startswith, not contains)."""
@patch.dict(os.environ, {"LMCACH_TYPO": "1"}, clear=False)
def test_prefix_no_partial_match(self):
"""'LMCACH_' does not match the 'LMCACHE_' prefix."""
result = get_env_vars_to_copy()
assert "LMCACH_TYPO" not in result
@patch.dict(
os.environ,
{
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": " MYLIB_ , OTHER_ ",
},
clear=False,
)
def test_csv_whitespace_handling(self):
"""Whitespace around commas and tokens should be stripped."""
result = get_env_vars_to_copy()
# MYLIB_ and OTHER_ should be parsed as valid prefixes — no crash
assert isinstance(result, set)
@patch.dict(
os.environ,
{
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": "MYLIB_",
"LMCACHE_BACKEND": "cpu",
"NCCL_DEBUG": "INFO",
"MYLIB_FOO": "bar",
},
clear=False,
)
def test_user_prefix_additive(self):
"""Setting VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY does NOT drop defaults."""
result = get_env_vars_to_copy()
# Built-in defaults still present
assert "LMCACHE_BACKEND" in result
assert "NCCL_DEBUG" in result
# User addition also present
assert "MYLIB_FOO" in result
@patch.dict(
os.environ,
{
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": "MY_FLAG",
"PYTHONHASHSEED": "42",
"MY_FLAG": "1",
},
clear=False,
)
def test_user_extra_additive(self):
"""Setting VLLM_RAY_EXTRA_ENV_VARS_TO_COPY does NOT drop defaults."""
result = get_env_vars_to_copy()
# Built-in default still present
assert "PYTHONHASHSEED" in result
# User addition also present
assert "MY_FLAG" in result

View File

@@ -139,6 +139,8 @@ if TYPE_CHECKING:
VLLM_ENABLE_MOE_DP_CHUNK: bool = True VLLM_ENABLE_MOE_DP_CHUNK: bool = True
VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False VLLM_RANDOMIZE_DP_DUMMY_INPUTS: bool = False
VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict" VLLM_RAY_DP_PACK_STRATEGY: Literal["strict", "fill", "span"] = "strict"
VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY: str = ""
VLLM_RAY_EXTRA_ENV_VARS_TO_COPY: str = ""
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
VLLM_MARLIN_INPUT_DTYPE: Literal["int8", "fp8"] | None = None VLLM_MARLIN_INPUT_DTYPE: Literal["int8", "fp8"] | None = None
VLLM_MXFP4_USE_MARLIN: bool | None = None VLLM_MXFP4_USE_MARLIN: bool | None = None
@@ -1090,6 +1092,19 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_RAY_DP_PACK_STRATEGY": lambda: os.getenv( "VLLM_RAY_DP_PACK_STRATEGY": lambda: os.getenv(
"VLLM_RAY_DP_PACK_STRATEGY", "strict" "VLLM_RAY_DP_PACK_STRATEGY", "strict"
), ),
# Comma-separated *additional* prefixes of env vars to copy from the
# driver to Ray workers. These are merged with the built-in defaults
# defined in ``vllm.ray.ray_env`` (VLLM_, etc.). Example: "MYLIB_,OTHER_"
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY": lambda: os.getenv(
"VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY", ""
),
# Comma-separated *additional* individual env var names to copy from
# the driver to Ray workers. Merged with the built-in defaults
# defined in ``vllm.ray.ray_env`` (PYTHONHASHSEED).
# Example: "MY_SECRET,MY_FLAG"
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY": lambda: os.getenv(
"VLLM_RAY_EXTRA_ENV_VARS_TO_COPY", ""
),
# Whether to use S3 path for model loading in CI via RunAI Streamer # Whether to use S3 path for model loading in CI via RunAI Streamer
"VLLM_CI_USE_S3": lambda: os.environ.get("VLLM_CI_USE_S3", "0") == "1", "VLLM_CI_USE_S3": lambda: os.environ.get("VLLM_CI_USE_S3", "0") == "1",
# Use model_redirect to redirect the model name to a local folder. # Use model_redirect to redirect the model name to a local folder.

View File

@@ -10,8 +10,7 @@ logger = init_logger(__name__)
CONFIG_HOME = envs.VLLM_CONFIG_ROOT CONFIG_HOME = envs.VLLM_CONFIG_ROOT
# This file contains a list of env vars that should not be copied # Env vars that should NOT be copied from the driver to Ray workers.
# from the driver to the Ray workers.
RAY_NON_CARRY_OVER_ENV_VARS_FILE = os.path.join( RAY_NON_CARRY_OVER_ENV_VARS_FILE = os.path.join(
CONFIG_HOME, "ray_non_carry_over_env_vars.json" CONFIG_HOME, "ray_non_carry_over_env_vars.json"
) )
@@ -29,51 +28,89 @@ except json.JSONDecodeError:
) )
RAY_NON_CARRY_OVER_ENV_VARS = set() RAY_NON_CARRY_OVER_ENV_VARS = set()
# ---------------------------------------------------------------------------
# Built-in defaults for env var propagation.
# Users can add more via VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY and
# VLLM_RAY_EXTRA_ENV_VARS_TO_COPY (additive, not replacing).
# ---------------------------------------------------------------------------
DEFAULT_ENV_VAR_PREFIXES: set[str] = {
"VLLM_",
"LMCACHE_",
"NCCL_",
"UCX_",
"HF_",
"HUGGING_FACE_",
}
DEFAULT_EXTRA_ENV_VARS: set[str] = {
"PYTHONHASHSEED",
}
def _parse_csv(value: str) -> set[str]:
"""Split a comma-separated string into a set of stripped, non-empty tokens."""
return {tok.strip() for tok in value.split(",") if tok.strip()}
def get_env_vars_to_copy( def get_env_vars_to_copy(
exclude_vars: set[str] | None = None, exclude_vars: set[str] | None = None,
additional_vars: set[str] | None = None, additional_vars: set[str] | None = None,
destination: str | None = None, destination: str | None = None,
) -> set[str]: ) -> set[str]:
""" """Return the env var names to copy from the driver to Ray actors.
Get the environment variables to copy to downstream Ray actors.
Example use cases: The result is the union of:
- Copy environment variables from RayDistributedExecutor to Ray workers.
- Copy environment variables from RayDPClient to Ray DPEngineCoreActor. 1. Env vars registered in ``vllm.envs.environment_variables``.
2. Env vars in ``os.environ`` matching a prefix in
``DEFAULT_ENV_VAR_PREFIXES`` + ``VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY``.
3. Individual names in ``DEFAULT_EXTRA_ENV_VARS`` +
``VLLM_RAY_EXTRA_ENV_VARS_TO_COPY``.
4. Caller-supplied *additional_vars* (e.g. platform-specific).
Minus any names in *exclude_vars* or ``RAY_NON_CARRY_OVER_ENV_VARS``.
Args: Args:
exclude_vars: A set of vllm defined environment variables to exclude exclude_vars: Env vars to exclude (e.g. worker-specific ones).
from copying. additional_vars: Extra individual env var names to copy. Useful
additional_vars: A set of additional environment variables to copy. for caller-specific vars (e.g. platform env vars).
If a variable is in both exclude_vars and additional_vars, it will destination: Label used in log messages only.
be excluded.
destination: The destination of the environment variables.
Returns:
A set of environment variables to copy.
""" """
exclude_vars = exclude_vars or set() exclude = (exclude_vars or set()) | RAY_NON_CARRY_OVER_ENV_VARS
additional_vars = additional_vars or set()
env_vars_to_copy = { # -- prefixes (built-in + user-supplied, additive) ----------------------
v prefixes = DEFAULT_ENV_VAR_PREFIXES | _parse_csv(
for v in set(envs.environment_variables).union(additional_vars) envs.VLLM_RAY_EXTRA_ENV_VAR_PREFIXES_TO_COPY
if v not in exclude_vars and v not in RAY_NON_CARRY_OVER_ENV_VARS
}
to_destination = " to " + destination if destination is not None else ""
logger.info(
"RAY_NON_CARRY_OVER_ENV_VARS from config: %s", RAY_NON_CARRY_OVER_ENV_VARS
) )
# -- collect env var names ----------------------------------------------
# 1. vLLM's registered env vars
result = set(envs.environment_variables)
# 2. Prefix-matched vars present in the current environment
result |= {name for name in os.environ if any(name.startswith(p) for p in prefixes)}
# 3. Individual extra vars (built-in + user-supplied, additive)
result |= DEFAULT_EXTRA_ENV_VARS | _parse_csv(envs.VLLM_RAY_EXTRA_ENV_VARS_TO_COPY)
# 4. Caller-supplied extra vars (e.g. platform-specific)
result |= additional_vars or set()
# 5. Exclude worker-specific and user-blacklisted vars
result -= exclude
# -- logging ------------------------------------------------------------
dest = f" to {destination}" if destination else ""
logger.info("Env var prefixes to copy: %s", sorted(prefixes))
logger.info( logger.info(
"Copying the following environment variables%s: %s", "Copying the following environment variables%s: %s",
to_destination, dest,
[v for v in env_vars_to_copy if v in os.environ], sorted(v for v in result if v in os.environ),
)
if RAY_NON_CARRY_OVER_ENV_VARS:
logger.info(
"RAY_NON_CARRY_OVER_ENV_VARS from config: %s",
RAY_NON_CARRY_OVER_ENV_VARS,
) )
logger.info( logger.info(
"If certain env vars should NOT be copied, add them to %s file", "To exclude env vars from copying, add them to %s",
RAY_NON_CARRY_OVER_ENV_VARS_FILE, RAY_NON_CARRY_OVER_ENV_VARS_FILE,
) )
return env_vars_to_copy return result

View File

@@ -73,9 +73,6 @@ class RayDistributedExecutor(Executor):
"ROCR_VISIBLE_DEVICES", "ROCR_VISIBLE_DEVICES",
} }
# These non-vLLM env vars are copied from the driver to workers
ADDITIONAL_ENV_VARS = {"HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"}
uses_ray: bool = True uses_ray: bool = True
supports_pp: bool = True supports_pp: bool = True
@@ -339,9 +336,7 @@ class RayDistributedExecutor(Executor):
# Environment variables to copy from driver to workers # Environment variables to copy from driver to workers
env_vars_to_copy = get_env_vars_to_copy( env_vars_to_copy = get_env_vars_to_copy(
exclude_vars=self.WORKER_SPECIFIC_ENV_VARS, exclude_vars=self.WORKER_SPECIFIC_ENV_VARS,
additional_vars=set(current_platform.additional_env_vars).union( additional_vars=set(current_platform.additional_env_vars),
self.ADDITIONAL_ENV_VARS
),
destination="workers", destination="workers",
) )